initial commit
This commit is contained in:
commit
69a1c2fd2e
285 changed files with 75894 additions and 0 deletions
8
AUTHORS.md
Normal file
8
AUTHORS.md
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
Unified Platform source code is owned and published by HSE University.
|
||||
|
||||
Unified Platform source code authors and project participants:
|
||||
|
||||
Valentin Polezhaev <valentin.polezhaev@gmail.com>
|
||||
Anton Khritankov <anton.khritankov@gmail.com>
|
||||
Nikita Klimin <nklimin007@gmail.com>
|
||||
Alexey Masyutin <alexey.masyutin@gmail.com>
|
||||
202
LICENSE.txt
Normal file
202
LICENSE.txt
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2023 - 2025 HSE University
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
79
README.md
Normal file
79
README.md
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# Единая библиотека программных и аналитических средств (фреймворк) Центра ИИ НИУ ВШЭ
|
||||
|
||||
Единая библиотека программных и аналитических средств,
|
||||
направленных на решение прикладных задач по тематике проектных команд Центра ИИ,
|
||||
основана на **Едином MLOps фреймворке НИУ ВШЭ (Unified Platform)**.
|
||||
|
||||
В настоящем репозитории размещен исходный код **Единого MLOps фреймворка НИУ ВШЭ (Unified Platform)**.
|
||||
|
||||
Фреймворк Unified Platform облегчает и ускоряет
|
||||
разработку и применение моделей машинного обучения и ИИ приложений.
|
||||
|
||||
## Возможности
|
||||
- Продуктивизация моделей машинного обучения;
|
||||
- Пайплайны обучения и дообучения моделей;
|
||||
- Развертывание ИИ приложений;
|
||||
- Хранение и доступ к наборам данных;
|
||||
- Использование облачных вычислительных ресурсов;
|
||||
- Многопользовательский доступ;
|
||||
|
||||
## Начало использования
|
||||
|
||||
### SmartMLOps НИУ ВШЭ
|
||||
|
||||
Чтобы познакомиться с фреймворком достаточно
|
||||
воспользоваться платформой [SmartMLOps НИУ ВШЭ](https://mlops.hse.ru/),
|
||||
которая создана на основе фреймворка.
|
||||
|
||||
### Установка на платформе Kubernetes
|
||||
|
||||
На основе фреймворка можно развертывать собственные MLOps платформы.
|
||||
Для этого фреймворк должен быть размещен на облачной вычислительной платформе или внутреннем облаке
|
||||
с наличием поддерживаемого Kubernetes.
|
||||
|
||||
Для развертывания собственной MLOps платформы на основе фреймворка см. [инструкцию](./deploy/README.md) по установке.
|
||||
|
||||
## Документация
|
||||
|
||||
Для оператора или администратора:
|
||||
- Инструкция по [установке](./deploy/README.md) фреймворка;
|
||||
|
||||
Для разработчика моделей машинного обучения и ИИ приложений:
|
||||
- Руководства по [использованию фреймворка](https://platform-forgejo.stratpro.hse.ru/mlops_platform/documentation)
|
||||
|
||||
Для разработчика:
|
||||
- Документация по [сборке](./docs/building.md)
|
||||
- Документация по [тестированию](./docs/testing.md)
|
||||
|
||||
## Помощь и поддержка
|
||||
|
||||
По обращениям и вопросам доступна форма [обратной связи](https://mlops.hse.ru/support).
|
||||
|
||||
## SmartMLOps НИУ ВШЭ
|
||||
|
||||
Платформа [SmartMLOps НИУ ВШЭ](https://mlops.hse.ru/) предназначена для разработки и интеграции прикладных модулей,
|
||||
использующих технологии искусственного интеллекта,
|
||||
например, ИИ-помощников в образовательном и административном процессах,
|
||||
в сфере управления, медицине и др. В системе размещаются интеллектуальные сервисы,
|
||||
используемые в рабочих процессах, экспериментальные и учебные модели.
|
||||
На базе системы возможно проведение опытной эксплуатации и внедрения результатов НИОКР лабораторий НИУ ВШЭ.
|
||||
Система является площадкой для продуктивизации и коммерциализации разрабатываемых решений.
|
||||
|
||||
Фреймворк служит основой для платформы [SmartMLOps НИУ ВШЭ](https://mlops.hse.ru/).
|
||||
|
||||
## Спонсоры проекта
|
||||
|
||||
<div style="width:50%; margin-bottom: 25px">
|
||||
<div style="text-align: center">
|
||||
<h4>Национальный исследовательский университет Высшая школа экономики</h4>
|
||||
|
||||
Фреймворк разработан при поддержке
|
||||
Национального исследовательского университета «Высшая школа экономики»
|
||||
|
||||
<img src="./docs/img/hse-logo.png" alt="hse-logo.png" width="100px"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
## Лицензия
|
||||
|
||||
[Apache License 2.0](./LICENSE.txt)
|
||||
27
build/Dockerfile
Normal file
27
build/Dockerfile
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
FROM python:3.10-slim-buster
|
||||
|
||||
USER root
|
||||
|
||||
WORKDIR /
|
||||
|
||||
ADD ./controller/requirements.txt /controller/requirements.txt
|
||||
|
||||
WORKDIR /controller
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
ADD ./controller /controller
|
||||
|
||||
RUN useradd -m controller
|
||||
RUN chown -R controller .
|
||||
|
||||
ENV PYTHONPATH "${PYTHONPATH}:/controller/src"
|
||||
|
||||
CMD kopf run ./src/main.py --verbose
|
||||
|
||||
USER controller
|
||||
29
build/pipeline_validate_results.Dockerfile
Normal file
29
build/pipeline_validate_results.Dockerfile
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
FROM python:3.10-slim-buster
|
||||
|
||||
USER root
|
||||
|
||||
WORKDIR /
|
||||
|
||||
ADD ./controller/pipeline_validate_results_requirements.txt /pipeline_validate_results/pipeline_validate_results_requirements.txt
|
||||
|
||||
WORKDIR /pipeline_validate_results
|
||||
RUN pip install -r pipeline_validate_results_requirements.txt
|
||||
|
||||
ADD ./controller/src/exp_pipeline/results /pipeline_validate_results/src/exp_pipeline/results
|
||||
ADD ./controller/src/exp_pipeline/client_config.py /pipeline_validate_results/src/exp_pipeline/client_config.py
|
||||
ADD ./controller/src/exp_pipeline/schema.py /pipeline_validate_results/src/exp_pipeline/schema.py
|
||||
ADD ./controller/src/exp_pipeline/logs.py /pipeline_validate_results/src/exp_pipeline/logs.py
|
||||
ADD ./controller/src/exp_pipeline/__init__.py /pipeline_validate_results/src/exp_pipeline/__init__.py
|
||||
|
||||
RUN useradd -m pipeine
|
||||
RUN chown -R pipeine .
|
||||
|
||||
ENV PYTHONPATH "${PYTHONPATH}:/pipeline_validate_results/src"
|
||||
|
||||
USER pipeine
|
||||
3
controller/.dockerignore
Normal file
3
controller/.dockerignore
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
.env
|
||||
.env.sample
|
||||
venv
|
||||
67
controller/.env.sample
Normal file
67
controller/.env.sample
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET=
|
||||
UNIP_BOXES_S3_SECRET_NAME=
|
||||
UNIP_BOXES_S3_DEFAULT_HOST=
|
||||
UNIP_BOXES_S3_DEFAULT_REGION=
|
||||
|
||||
UNIP_DOMAIN=
|
||||
|
||||
UNIP_FILES_API=
|
||||
UNIP_PIPELINES_API=
|
||||
UNIP_PIPELINE_VALIDATION_IMAGE=
|
||||
UNIP_PIPELINE_RUN_MODE=
|
||||
UNIP_DATETIME_FORMAT=
|
||||
|
||||
UNIP_DATABASE_PIPELINES_USER=
|
||||
UNIP_DATABASE_PIPELINES_PASSWORD=
|
||||
UNIP_DATABASE_PIPELINES_HOST=
|
||||
UNIP_DATABASE_PIPELINES_PORT=
|
||||
UNIP_DATABASE_PIPELINES_DB=
|
||||
|
||||
UNIP_BOXES_CSI_S3_SECRET_NAME=
|
||||
UNIP_BOXES_CSI_S3_STORAGE_CLASS=
|
||||
|
||||
ARGO_CD_USER=
|
||||
ARGO_CD_PASSWORD=
|
||||
ARGO_CD_ENABLED=
|
||||
ARGO_CD_API=
|
||||
|
||||
OIDC_END_USERS_ISSUER_URL=
|
||||
OIDC_END_USERS_REALM=
|
||||
OIDC_END_USERS_CLIENT_ID=
|
||||
OIDC_END_USERS_CLIENT_SECRET=
|
||||
OIDC_END_USERS_ADMIN_CLIENT_ID=
|
||||
OIDC_END_USERS_ADMIN_CLIENT_SECRET=
|
||||
OIDC_END_USERS_AUD=end-users
|
||||
OIDC_END_USERS_COOKIE_SECRET=
|
||||
|
||||
OIDC_ROBOTS_ISSUER_URL=
|
||||
OIDC_ROBOTS_REALM=
|
||||
OIDC_ROBOTS_CLIENT_ID=
|
||||
OIDC_ROBOTS_CLIENT_SECRET=
|
||||
OIDC_ROBOTS_ADMIN_CLIENT_ID=
|
||||
OIDC_ROBOTS_ADMIN_CLIENT_SECRET=
|
||||
OIDC_ROBOTS_AUD=robots
|
||||
|
||||
OIDC_KEYCLOAK_HOST=
|
||||
OIDC_MANAGED_GROUPS_ROOT=
|
||||
OIDC_CLUSTER_GROUP_NAME=
|
||||
OIDC_MODIFY=
|
||||
|
||||
UNIP_REPOSITORY_KEYCLOAK_ROBOTS_CREDS_NAMESPACE=
|
||||
UNIP_REPOSITORY_KEYCLOAK_ROBOTS_TOKENS_RENEWAL_PERIOD_MIN=
|
||||
|
||||
FILES_API_BASE_URL=
|
||||
INGRESS_RULE_HOST=
|
||||
INGRESS_BACKEND_SERVICE_NAME=
|
||||
INGRESS_BACKEND_SERVICE_PORT=
|
||||
UNIP_DATASET_CMP_CHECK_CONTROLLER_PERMISSIONS=
|
||||
UNIP_DATASET_CMP_VALIDATE=
|
||||
|
||||
UNIP_API_CMP_CORS_ENABLED=
|
||||
UNIP_API_CMP_APPS_CORS_ENABLED=
|
||||
UNIP_API_CMP_CORS_ALLOW_METHODS=
|
||||
UNIP_API_CMP_CORS_ALLOW_HEADERS=
|
||||
UNIP_API_CMP_CORS_EXPOSE_HEADERS=
|
||||
UNIP_API_CMP_CORS_ALLOW_ORIGIN=
|
||||
UNIP_API_CMP_CORS_MAX_AGE=
|
||||
|
||||
115
controller/alembic.ini
Normal file
115
controller/alembic.ini
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts.
|
||||
# Use forward slashes (/) also on windows to provide an os agnostic path
|
||||
script_location = alembic
|
||||
|
||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
||||
# Uncomment the line below if you want the files to be prepended with date and time
|
||||
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
# defaults to the current working directory.
|
||||
prepend_sys_path = .
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the python>=3.9 or backports.zoneinfo library.
|
||||
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
||||
# string value is passed to ZoneInfo()
|
||||
# leave blank for localtime
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification; This defaults
|
||||
# to alembic/versions. When using multiple version
|
||||
# directories, initial revisions must be specified with --version-path.
|
||||
# The path separator used here should be the separator specified by "version_path_separator" below.
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
||||
|
||||
# version path separator; As mentioned above, this is the character used to split
|
||||
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
||||
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
||||
# Valid values for version_path_separator are:
|
||||
#
|
||||
# version_path_separator = :
|
||||
# version_path_separator = ;
|
||||
# version_path_separator = space
|
||||
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
# new in Alembic version 1.10
|
||||
# recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
# sqlalchemy.url is overwritten in env.py
|
||||
# sqlalchemy.url = driver://user:pass@localhost/dbname
|
||||
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts. See the documentation for further
|
||||
# detail and examples
|
||||
|
||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
||||
# hooks = black
|
||||
# black.type = console_scripts
|
||||
# black.entrypoint = black
|
||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
||||
|
||||
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
||||
# hooks = ruff
|
||||
# ruff.type = exec
|
||||
# ruff.executable = %(here)s/.venv/bin/ruff
|
||||
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
1
controller/alembic/README
Normal file
1
controller/alembic/README
Normal file
|
|
@ -0,0 +1 @@
|
|||
Generic single-database configuration with an async dbapi.
|
||||
94
controller/alembic/env.py
Normal file
94
controller/alembic/env.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
import asyncio
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from dotenv import load_dotenv
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
from exp_pipeline.storage.db import SQLALCHEMY_DATABASE_URL as UNIP_DATABASE_PIPELINES_URI
|
||||
config.set_main_option('sqlalchemy.url', UNIP_DATABASE_PIPELINES_URI)
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
from exp_pipeline.storage import models
|
||||
target_metadata = models.Base.metadata
|
||||
# target_metadata = None
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
"""In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
|
||||
connectable = async_engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode."""
|
||||
|
||||
asyncio.run(run_async_migrations())
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
26
controller/alembic/script.py.mako
Normal file
26
controller/alembic/script.py.mako
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
32
controller/alembic/versions/6b877d2a07a9_.py
Normal file
32
controller/alembic/versions/6b877d2a07a9_.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
"""empty message
|
||||
|
||||
Revision ID: 6b877d2a07a9
|
||||
Revises: bc8a32d54029
|
||||
Create Date: 2024-08-21 14:36:12.101684
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '6b877d2a07a9'
|
||||
down_revision: Union[str, None] = 'bc8a32d54029'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('pipeline_trials', sa.Column('next_tracking_id', sa.String(), nullable=True))
|
||||
op.drop_column('pipeline_trials', 'status_date_time')
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('pipeline_trials', sa.Column('status_date_time', postgresql.TIMESTAMP(), autoincrement=False, nullable=False))
|
||||
op.drop_column('pipeline_trials', 'next_tracking_id')
|
||||
# ### end Alembic commands ###
|
||||
30
controller/alembic/versions/8430ab0a47af_.py
Normal file
30
controller/alembic/versions/8430ab0a47af_.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""empty message
|
||||
|
||||
Revision ID: 8430ab0a47af
|
||||
Revises:
|
||||
Create Date: 2024-07-29 10:42:57.684023
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '8430ab0a47af'
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('pipeline_trials', 'tracking_token')
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('pipeline_trials', sa.Column('tracking_token', sa.VARCHAR(), autoincrement=False, nullable=False))
|
||||
# ### end Alembic commands ###
|
||||
32
controller/alembic/versions/bc8a32d54029_.py
Normal file
32
controller/alembic/versions/bc8a32d54029_.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
"""Значения переменных ассоциированы с корнем триала
|
||||
|
||||
Revision ID: bc8a32d54029
|
||||
Revises: 8430ab0a47af
|
||||
Create Date: 2024-08-14 16:38:05.542362
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'bc8a32d54029'
|
||||
down_revision: Union[str, None] = '8430ab0a47af'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('pipeline_trials', sa.Column('vars', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
|
||||
op.drop_column('pipeline_trials', 'stages')
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('pipeline_trials', sa.Column('stages', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=False))
|
||||
op.drop_column('pipeline_trials', 'vars')
|
||||
# ### end Alembic commands ###
|
||||
2
controller/pipeline_validate_results_requirements.txt
Normal file
2
controller/pipeline_validate_results_requirements.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
requests>=2.31
|
||||
pydantic>=2.6
|
||||
17
controller/requirements.txt
Normal file
17
controller/requirements.txt
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
jinja2>=3.1
|
||||
kopf>=1.37
|
||||
kubernetes>=29
|
||||
python-dotenv>=1.0.1
|
||||
minio>=7.2
|
||||
fastapi>=0.110
|
||||
hypercorn>=0.16
|
||||
sqlalchemy>=2.0.28
|
||||
asyncpg>=0.29
|
||||
aws-request-signer>=1.2
|
||||
httpx>=0.27
|
||||
kubernetes-asyncio>=29
|
||||
lxml>=5.2
|
||||
jsonschema>=4.23
|
||||
bcrypt>=4
|
||||
packaging
|
||||
pydantic-settings
|
||||
0
controller/src/apicmp/__init__.py
Normal file
0
controller/src/apicmp/__init__.py
Normal file
25
controller/src/apicmp/config.py
Normal file
25
controller/src/apicmp/config.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: APIComponent
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
|
||||
_UNIP_API_CMP_CORS_ENABLED = os.getenv('UNIP_API_CMP_CORS_ENABLED', 'False')
|
||||
UNIP_API_CMP_CORS_ENABLED = \
|
||||
True if _UNIP_API_CMP_CORS_ENABLED == 'True' or _UNIP_API_CMP_CORS_ENABLED == 'true' else False
|
||||
_UNIP_API_CMP_APPS_CORS_ENABLED = os.getenv('UNIP_API_CMP_APPS_CORS_ENABLED')
|
||||
if _UNIP_API_CMP_APPS_CORS_ENABLED is None:
|
||||
UNIP_API_CMP_APPS_CORS_ENABLED = UNIP_API_CMP_CORS_ENABLED
|
||||
else:
|
||||
UNIP_API_CMP_APPS_CORS_ENABLED = \
|
||||
True if _UNIP_API_CMP_APPS_CORS_ENABLED == 'True' or _UNIP_API_CMP_APPS_CORS_ENABLED == 'true' else False
|
||||
|
||||
UNIP_API_CMP_CORS_ALLOW_METHODS = os.getenv('UNIP_API_CMP_CORS_ALLOW_METHODS')
|
||||
UNIP_API_CMP_CORS_ALLOW_HEADERS = os.getenv('UNIP_API_CMP_CORS_ALLOW_HEADERS')
|
||||
UNIP_API_CMP_CORS_EXPOSE_HEADERS = os.getenv('UNIP_API_CMP_CORS_EXPOSE_HEADERS')
|
||||
UNIP_API_CMP_CORS_ALLOW_ORIGIN = os.getenv('UNIP_API_CMP_CORS_ALLOW_ORIGIN')
|
||||
_api_cmp_cors_max_age = os.getenv('UNIP_API_CMP_CORS_MAX_AGE')
|
||||
UNIP_API_CMP_CORS_MAX_AGE = int(_api_cmp_cors_max_age) if _api_cmp_cors_max_age else None
|
||||
1051
controller/src/apicmp/handlers.py
Normal file
1051
controller/src/apicmp/handlers.py
Normal file
File diff suppressed because it is too large
Load diff
21
controller/src/apicmp/jinja.py
Normal file
21
controller/src/apicmp/jinja.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: APIComponent
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
basic_jinja_env = Environment(
|
||||
loader=FileSystemLoader('templates/basic-resources'),
|
||||
lstrip_blocks=True,
|
||||
trim_blocks=True
|
||||
)
|
||||
|
||||
|
||||
api_cmp_jinja_env = Environment(
|
||||
loader=FileSystemLoader('templates/api-component'),
|
||||
lstrip_blocks=True,
|
||||
trim_blocks=True
|
||||
)
|
||||
|
||||
67
controller/src/auth.py
Normal file
67
controller/src/auth.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
import logging
|
||||
from string import ascii_uppercase, ascii_lowercase
|
||||
|
||||
import bcrypt
|
||||
from fastapi import HTTPException
|
||||
from starlette.requests import Request
|
||||
|
||||
from exp_pipeline.client_config import INTERNAL_API_USER_ID
|
||||
|
||||
# https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
|
||||
ALLOWED_USER_NAME_SYMBOLS = set(ascii_uppercase) | set(ascii_lowercase) | set('0123456789') | {'-', '.', '_', '~'}
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_user_id_from_basic(basic):
|
||||
try:
|
||||
credentials = base64.b64decode(basic).decode(encoding='utf-8')
|
||||
sep_index = credentials.index(':')
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=401, detail='Invalid Basic Authorization credentials format')
|
||||
except Exception:
|
||||
raise HTTPException(status_code=401, detail='Invalid Basic Authorization credentials')
|
||||
user_name = credentials[:sep_index]
|
||||
return user_name
|
||||
|
||||
|
||||
def get_user_id(request: Request):
|
||||
if 'Authorization' not in request.headers:
|
||||
raise HTTPException(status_code=401, detail='User identity not passed')
|
||||
authorization = request.headers['Authorization']
|
||||
parts = authorization.split(' ')
|
||||
if parts[0] != 'Basic' or len(parts) < 2:
|
||||
raise HTTPException(status_code=401, detail='Only Basic Authorization schema is supported now')
|
||||
user_name = None
|
||||
if parts[0] == 'Basic':
|
||||
user_name = _get_user_id_from_basic(parts[1])
|
||||
if not user_name or any([s not in ALLOWED_USER_NAME_SYMBOLS for s in user_name]):
|
||||
raise HTTPException(status_code=401, detail='Invalid user name')
|
||||
return user_name
|
||||
|
||||
|
||||
def allowed_to_platform_user(request: Request):
|
||||
user_name = get_user_id(request)
|
||||
if user_name != INTERNAL_API_USER_ID:
|
||||
raise HTTPException(status_code=403, detail=f'Forbidden for user {user_name}')
|
||||
|
||||
|
||||
def create_htpasswd_password(password: str) -> str:
|
||||
return bcrypt.hashpw(password.encode('utf-8'),
|
||||
bcrypt.gensalt(rounds=5, prefix=b'2b')).decode('utf-8')
|
||||
|
||||
|
||||
def get_basic_auth_credentials(credentials_lines: str) -> tuple[str, str]:
|
||||
records = base64.b64decode(credentials_lines).decode('utf-8').split('\n')
|
||||
first_record = records[0]
|
||||
sep_index = first_record.find(':')
|
||||
user_id, user_password = first_record[:sep_index], first_record[sep_index + 1:]
|
||||
return user_id, user_password
|
||||
0
controller/src/basic_resources/__init__.py
Normal file
0
controller/src/basic_resources/__init__.py
Normal file
64
controller/src/basic_resources/deployments.py
Normal file
64
controller/src/basic_resources/deployments.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from kubernetes.client import ApiClient, CoreV1Api, ApiException, AppsV1Api
|
||||
|
||||
|
||||
def delete_deployment_if_exists(api_client: ApiClient, namespace: str, deployment_name: str, logger):
|
||||
apps_v1_api: AppsV1Api = AppsV1Api(api_client)
|
||||
try:
|
||||
apps_v1_api.delete_namespaced_deployment(name=deployment_name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f"Deployment {deployment_name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Deployment {deployment_name} is deleted")
|
||||
|
||||
|
||||
def create_deployment(api_client: ApiClient, namespace, manifest):
|
||||
apps_v1_api: AppsV1Api = AppsV1Api(api_client)
|
||||
apps_v1_api.create_namespaced_deployment(namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def create_or_update_deployment(api_client: ApiClient, name: str, namespace: str, manifest, logger):
|
||||
apps_v1_api: AppsV1Api = AppsV1Api(api_client)
|
||||
|
||||
try:
|
||||
apps_v1_api.create_namespaced_deployment(namespace=namespace, body=manifest)
|
||||
logger.info(f"Deployment {name} is created")
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
logger.warning(f'Deployment {name} already exists, will be replaced')
|
||||
apps_v1_api.replace_namespaced_deployment(name=name, namespace=namespace, body=manifest)
|
||||
logger.info(f"Deployment {name} is replaced")
|
||||
|
||||
|
||||
def restart_deployment_if_exists(api_client: ApiClient, name: str, namespace: str, logger):
|
||||
apps_v1_api: AppsV1Api = AppsV1Api(api_client)
|
||||
now = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
now = now.isoformat("T") + "Z"
|
||||
patch = {
|
||||
'spec': {
|
||||
'template': {
|
||||
'metadata': {
|
||||
'annotations': {
|
||||
'kubectl.kubernetes.io/restartedAt': now
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
try:
|
||||
apps_v1_api.patch_namespaced_deployment(name, namespace, patch)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f"Deployment {namespace}.{name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Deployment {namespace}.{name} scheduled for restart")
|
||||
47
controller/src/basic_resources/ingresses.py
Normal file
47
controller/src/basic_resources/ingresses.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from kubernetes.client import ApiClient, NetworkingV1Api, ApiException
|
||||
|
||||
|
||||
def delete_ingress_if_exists(api_client: ApiClient, name: str, namespace: str,
|
||||
logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
|
||||
try:
|
||||
networking_v1_api.delete_namespaced_ingress(name=name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f"Ingress {name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Ingress {name} is deleted")
|
||||
|
||||
|
||||
def create_or_update_ingress(api_client: ApiClient, name: str, namespace: str, manifest,
|
||||
logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
|
||||
try:
|
||||
networking_v1_api.create_namespaced_ingress(namespace=namespace, body=manifest)
|
||||
logger.info(f"Ingress {name} is created")
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
logger.warning(f'Ingress {name} already exists, will be replaced')
|
||||
networking_v1_api.replace_namespaced_ingress(name=name, namespace=namespace, body=manifest)
|
||||
logger.info(f"Ingress {name} is replaced")
|
||||
|
||||
|
||||
def read_ingress_if_exists(api_client: ApiClient, name, namespace, logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
try:
|
||||
ingress = networking_v1_api.read_namespaced_ingress(name=name, namespace=namespace)
|
||||
return ingress
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f'Ingress {namespace}.{name} does not exist')
|
||||
return None
|
||||
raise exc
|
||||
14
controller/src/basic_resources/jinja.py
Normal file
14
controller/src/basic_resources/jinja.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
basic_jinja_env = Environment(
|
||||
loader=FileSystemLoader('templates/basic-resources'),
|
||||
lstrip_blocks=True,
|
||||
trim_blocks=True
|
||||
)
|
||||
|
||||
45
controller/src/basic_resources/jobs.py
Normal file
45
controller/src/basic_resources/jobs.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
from kubernetes.client import BatchV1Api, ApiClient, ApiException
|
||||
|
||||
from exceptions import ObjectAlreadyExistsTemporaryError
|
||||
|
||||
|
||||
def create_cron_job(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
batch_v1_api = BatchV1Api(api_client)
|
||||
try:
|
||||
batch_v1_api.create_namespaced_cron_job(namespace, body=manifest)
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
raise ObjectAlreadyExistsTemporaryError
|
||||
raise exc
|
||||
|
||||
|
||||
def update_cron_job(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
batch_v1_api = BatchV1Api(api_client)
|
||||
batch_v1_api.replace_namespaced_cron_job(name=name, namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def delete_cron_job_if_exists(api_client: ApiClient, name, namespace, logger):
|
||||
batch_v1_api = BatchV1Api(api_client)
|
||||
try:
|
||||
batch_v1_api.delete_namespaced_cron_job(name=name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f"CronJob {name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"CronJob {name} is deleted")
|
||||
|
||||
|
||||
def create_or_update_cron_job(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
try:
|
||||
create_cron_job(api_client=api_client,
|
||||
name=name,
|
||||
namespace=namespace,
|
||||
manifest=manifest,
|
||||
logger=logger)
|
||||
except ObjectAlreadyExistsTemporaryError as exc:
|
||||
update_cron_job(api_client=api_client,
|
||||
name=name,
|
||||
namespace=namespace,
|
||||
manifest=manifest,
|
||||
logger=logger)
|
||||
36
controller/src/basic_resources/namespaces.py
Normal file
36
controller/src/basic_resources/namespaces.py
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
"""
|
||||
Namespaces basic resources module. Implements reusable namespaces-related functions.
|
||||
"""
|
||||
|
||||
from kubernetes.client import CoreV1Api, ApiException
|
||||
|
||||
|
||||
def create_namespace(api_client, name, manifest, logger):
|
||||
"""
|
||||
Создает новое пространство имен с заданным именем
|
||||
и в соответствии с переданным манифестом.
|
||||
|
||||
Функция является идемпотентной.
|
||||
Если пространство имен существует, то обновляет его.
|
||||
Если пространство имен отсутствует, то создает его.
|
||||
|
||||
:param api_client: Kubernetes API клиент
|
||||
:param name: Имя пространства имен
|
||||
:param manifest: Спецификация пространства имен
|
||||
:param logger: kopf logger
|
||||
:return: None
|
||||
"""
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
core_v1_api.create_namespace(body=manifest)
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
core_v1_api.replace_namespace(name=name, body=manifest)
|
||||
return
|
||||
raise exc
|
||||
44
controller/src/basic_resources/network_policies.py
Normal file
44
controller/src/basic_resources/network_policies.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
"""
|
||||
Network policies module. Implements reusable network policies-related functions.
|
||||
"""
|
||||
|
||||
from kubernetes.client import NetworkingV1Api, ApiException, ApiClient
|
||||
|
||||
|
||||
def create_network_policy(api_client: ApiClient, manifest, namespace, logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
data = manifest
|
||||
np = networking_v1_api.create_namespaced_network_policy(namespace=namespace, body=data)
|
||||
logger.info(f"NetworkPolicy is created: {np}")
|
||||
|
||||
|
||||
def delete_network_policy_if_exists(api_client: ApiClient, np_name, namespace, logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
try:
|
||||
networking_v1_api.delete_namespaced_network_policy(name=np_name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f"NetworkPolicy {np_name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"NetworkPolicy {np_name} is deleted")
|
||||
|
||||
|
||||
def create_or_update_network_policy(api_client: ApiClient, name: str, namespace: str, manifest,
|
||||
logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
|
||||
try:
|
||||
networking_v1_api.create_namespaced_network_policy(namespace=namespace, body=manifest)
|
||||
logger.info(f"Network policy {name} is created")
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
logger.warning(f'Network policy {name} already exists, will be replaced')
|
||||
networking_v1_api.replace_namespaced_network_policy(name=name, namespace=namespace, body=manifest)
|
||||
logger.info(f"Network policy {name} is replaced")
|
||||
148
controller/src/basic_resources/rbac.py
Normal file
148
controller/src/basic_resources/rbac.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
"""
|
||||
RBAC basic resources module. Implements reusable RBAC-related functions.
|
||||
"""
|
||||
|
||||
import kopf
|
||||
from kubernetes.client import RbacAuthorizationV1Api, ApiException, CoreV1Api, ApiClient
|
||||
|
||||
_DEFAULT_SA_NAME = 'default'
|
||||
|
||||
|
||||
def update_default_sa(api_client, namespace, manifest, logger):
|
||||
"""
|
||||
Обновляет сервисный аккаунт (ServiceAccount) 'default'.
|
||||
|
||||
Функция является идемпотентной.
|
||||
|
||||
Если сервисный аккаунт отсутствует, то обработчик выбрасывает
|
||||
исключение kopf.TemporaryError, что приводит к повторному
|
||||
вызову метода спустия заданную задержку.
|
||||
|
||||
Если сервисный аккаунт существует, то его спецификация
|
||||
замещается переданной в параметре `manifest`.
|
||||
|
||||
:api_client: Kubernetes API клиент
|
||||
:param namespace: Пространство имен
|
||||
:param manifest: Спецификация сервисного аккаунта
|
||||
:param logger: kopf logger
|
||||
:return: None.
|
||||
"""
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
core_v1_api.read_namespaced_service_account(_DEFAULT_SA_NAME, namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
# SA с именем 'default' всегда существует и создается Kubernetes,
|
||||
# если отсутствует - нужно подождать
|
||||
logger.info(f'SA {_DEFAULT_SA_NAME} does not exists.')
|
||||
raise kopf.TemporaryError(f"Waiting for SA {_DEFAULT_SA_NAME} "
|
||||
f"to be created by Kubernetes system...",
|
||||
delay=10)
|
||||
raise exc
|
||||
logger.info(f"SA {_DEFAULT_SA_NAME} exists, trying to replace...")
|
||||
core_v1_api.replace_namespaced_service_account(name=_DEFAULT_SA_NAME, namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def delete_sa_if_exists(api_client: ApiClient, name, namespace, logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
# не имеет смысла для default SA - будет пересоздан Kubernetes
|
||||
# оставлено для общего соответствия и на случай изменения имени SA
|
||||
core_v1_api.delete_namespaced_service_account(name=name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f"SA {name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"SA {name} is deleted")
|
||||
|
||||
|
||||
def create_role(api_client, name, namespace, manifest):
|
||||
"""
|
||||
Создает роль (Role).
|
||||
|
||||
Функция является идемпотентной. Если роль существует,
|
||||
то она будет обновлена.
|
||||
|
||||
:param api_client: Kubernetes API клиент
|
||||
:param name: Имя роли
|
||||
:param namespace: Пространство имен
|
||||
:param manifest: Спецификация роли
|
||||
:return: None.
|
||||
"""
|
||||
rbac_v1_api = RbacAuthorizationV1Api(api_client)
|
||||
rbac_v1_api.replace_namespaced_role(name=name, namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def delete_role_if_exists(api_client, name, namespace, logger):
|
||||
"""
|
||||
Удаляет роль (Role), если она существует.
|
||||
|
||||
Функция является идемпотентной.
|
||||
Если роль существует, то она будет удалена.
|
||||
Если роль не существует, то ничего не произойдет.
|
||||
|
||||
:param api_client: Kuberentes API клиент
|
||||
:param name: Имя роли
|
||||
:param namespace: Пространство имен
|
||||
:param logger: kopf logger
|
||||
:return: None
|
||||
"""
|
||||
rbac_v1_api = RbacAuthorizationV1Api(api_client)
|
||||
try:
|
||||
rbac_v1_api.delete_namespaced_role(name=name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f"Role {name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Role {name} is deleted")
|
||||
|
||||
|
||||
def create_rb(api_client, name, namespace, manifest):
|
||||
"""
|
||||
Создает привязку роли (RoleBinding).
|
||||
|
||||
Функция является идемпотентной. Если привязка роли существует,
|
||||
то она будет обновлена.
|
||||
|
||||
:param api_client: Kubernetes API клиент
|
||||
:param name: Имя привязки роли
|
||||
:param namespace: Пространство имен
|
||||
:param manifest: Спецификация привязки роли
|
||||
:return: None.
|
||||
"""
|
||||
rbac_v1_api = RbacAuthorizationV1Api(api_client)
|
||||
rbac_v1_api.replace_namespaced_role_binding(name=name, namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def delete_rb_if_exists(api_client, name, namespace, logger):
|
||||
"""
|
||||
Удаляет привязку роли (RoleBinding), если она существует.
|
||||
|
||||
Функция является идемпотентной.
|
||||
Если привязка роли существует, то она будет удалена.
|
||||
Если привязка роли не существует, то ничего не произойдет.
|
||||
|
||||
:param api_client: Kuberentes API клиент
|
||||
:param name: Имя привязки роли
|
||||
:param namespace: Пространство имен
|
||||
:param logger: kopf logger
|
||||
:return: None
|
||||
"""
|
||||
rbac_v1_api = RbacAuthorizationV1Api(api_client)
|
||||
try:
|
||||
rbac_v1_api.delete_namespaced_role_binding(name=name,
|
||||
namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f"RoleBinding {name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"RoleBinding {name} is deleted")
|
||||
193
controller/src/basic_resources/secrets.py
Normal file
193
controller/src/basic_resources/secrets.py
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
|
||||
import yaml
|
||||
from kubernetes.client import ApiClient, CoreV1Api, ApiException, V1Secret, V1SecretList
|
||||
|
||||
from basic_resources.jinja import basic_jinja_env
|
||||
from exceptions import ObjectAlreadyExistsTemporaryError
|
||||
|
||||
SECRET_TEMPLATE = 'secret.yaml'
|
||||
|
||||
|
||||
def prepare_secret_manifest(name, namespace, type_, data: list[dict], data_attr='data', labels=None, annotations=None):
|
||||
template = basic_jinja_env.get_template(SECRET_TEMPLATE)
|
||||
variables = {
|
||||
'name': name,
|
||||
'namespace': namespace,
|
||||
'type': type_,
|
||||
'data': data,
|
||||
'data_attr': data_attr
|
||||
}
|
||||
if labels:
|
||||
variables['labels'] = labels
|
||||
if annotations:
|
||||
variables['annotations'] = annotations
|
||||
text = template.render(variables)
|
||||
manifest = yaml.safe_load(text)
|
||||
return manifest
|
||||
|
||||
|
||||
def create_secret_if_not_exists(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
core_v1_api.read_namespaced_secret(name, namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
core_v1_api.create_namespaced_secret(namespace, body=manifest)
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Secret {name} already exists, do nothing")
|
||||
|
||||
|
||||
def try_create_secret(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
core_v1_api.create_namespaced_secret(namespace, body=manifest)
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
logger.info(f"Secret {name} already exists, do nothing")
|
||||
raise exc
|
||||
|
||||
|
||||
def delete_secret_if_exists(api_client: ApiClient, name, namespace, logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
core_v1_api.delete_namespaced_secret(name=name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f"Secret {name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Secret {name} is deleted")
|
||||
|
||||
|
||||
def create_secret(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
core_v1_api.create_namespaced_secret(namespace, body=manifest)
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
raise ObjectAlreadyExistsTemporaryError
|
||||
raise exc
|
||||
|
||||
|
||||
def update_secret(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
core_v1_api.replace_namespaced_secret(name=name, namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def create_or_update_secret(api_client: ApiClient, name, namespace, manifest, logger):
|
||||
try:
|
||||
create_secret(api_client=api_client,
|
||||
name=name,
|
||||
namespace=namespace,
|
||||
manifest=manifest,
|
||||
logger=logger)
|
||||
except ObjectAlreadyExistsTemporaryError as exc:
|
||||
update_secret(api_client=api_client,
|
||||
name=name,
|
||||
namespace=namespace,
|
||||
manifest=manifest,
|
||||
logger=logger)
|
||||
|
||||
|
||||
def read_secret_if_exists(api_client: ApiClient, name, namespace) -> V1Secret | None:
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
secret = core_v1_api.read_namespaced_secret(name, namespace)
|
||||
return secret
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
return None
|
||||
raise exc
|
||||
|
||||
|
||||
def list_secrets(api_client: ApiClient, namespace, labels: dict) -> V1SecretList:
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
label_selector = ','.join(f'{label}={value}' for label, value in labels.items()) if labels else None
|
||||
if label_selector:
|
||||
secrets = core_v1_api.list_namespaced_secret(namespace, label_selector=label_selector)
|
||||
else:
|
||||
secrets = core_v1_api.list_namespaced_secret(namespace)
|
||||
return secrets
|
||||
|
||||
|
||||
def patch_secret_if_exists(api_client: ApiClient, name, namespace, patch) -> V1Secret | None:
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
secret = core_v1_api.patch_namespaced_secret(name, namespace, patch)
|
||||
return secret
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
return None
|
||||
raise exc
|
||||
|
||||
|
||||
def set_secret_annotations_and_labels(api_client: ApiClient, name, namespace, annotations, labels, logger):
|
||||
secret = read_secret_if_exists(api_client, name, namespace)
|
||||
if not secret:
|
||||
logger.warning(f'Secret {namespace}.{name} does not exist')
|
||||
return
|
||||
|
||||
patch = {'metadata': {'annotations': annotations, 'labels': labels}}
|
||||
|
||||
patch_secret_if_exists(api_client, name, namespace, patch)
|
||||
|
||||
|
||||
def copy_secret_data(api_client: ApiClient, source_secret_body, dest_secret_name,
|
||||
dest_namespace, logger):
|
||||
data = [{'key': item[0], 'value': item[1]} for item in source_secret_body['data'].items()]
|
||||
manifest = prepare_secret_manifest(dest_secret_name, dest_namespace, source_secret_body['type'], data)
|
||||
|
||||
create_or_update_secret(api_client=api_client,
|
||||
name=dest_secret_name,
|
||||
namespace=dest_namespace,
|
||||
manifest=manifest,
|
||||
logger=logger)
|
||||
|
||||
|
||||
def copy_secret_silently(api_client: ApiClient, source_secret_name, source_namespace, dest_secret_name,
|
||||
dest_namespace, logger):
|
||||
secret_obj = read_secret_if_exists(api_client, source_secret_name, source_namespace)
|
||||
if not secret_obj:
|
||||
logger.warning(f'source secret {source_namespace}.{source_secret_name} does not exist, '
|
||||
f'copying skipped')
|
||||
return
|
||||
|
||||
secret_body = secret_obj.to_dict()
|
||||
|
||||
copy_secret_data(api_client, secret_body, dest_secret_name, dest_namespace, logger)
|
||||
|
||||
|
||||
def read_basic_auth_credentials_from_secret(secret, secret_name, secret_namespace, logger,
|
||||
creds_attr_name='credentials'):
|
||||
secret_data = secret.to_dict()['data']
|
||||
if creds_attr_name not in secret_data:
|
||||
logger.warning(f'{creds_attr_name} field in secret {secret_namespace}.{secret_name} '
|
||||
f'does not exist')
|
||||
return
|
||||
|
||||
secret_creds = secret_data[creds_attr_name]
|
||||
secret_creds_decoded = base64.b64decode(secret_creds).decode('utf-8').strip()
|
||||
|
||||
if not secret_creds_decoded:
|
||||
logger.warning(f'auth field in secret {secret_namespace}.{secret_name} '
|
||||
f'is empty')
|
||||
return
|
||||
|
||||
result = []
|
||||
for cred in secret_creds_decoded.split('\n'):
|
||||
if ':' not in cred:
|
||||
logger.warning(f'wrong credentials format in secret {secret_namespace}.{secret_name}, '
|
||||
f'some credentials may be omitted')
|
||||
continue
|
||||
sep_index = cred.find(':')
|
||||
user, password = cred[:sep_index], cred[sep_index + 1:]
|
||||
result.append((user, password))
|
||||
return result
|
||||
38
controller/src/basic_resources/services.py
Normal file
38
controller/src/basic_resources/services.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Управления базовыми объектами Kubernetes
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
from kubernetes.client import ApiClient, CoreV1Api, ApiException
|
||||
|
||||
|
||||
def delete_svc_if_exists(api_client: ApiClient, namespace: str, svc_name: str, logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
try:
|
||||
core_v1_api.delete_namespaced_service(name=svc_name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f"Service {svc_name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Service {svc_name} is deleted")
|
||||
|
||||
|
||||
def create_service(api_client: ApiClient, namespace, manifest):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
core_v1_api.create_namespaced_service(namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def create_or_update_service(api_client: ApiClient, name: str, namespace: str, manifest,
|
||||
logger):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
|
||||
try:
|
||||
core_v1_api.create_namespaced_service(namespace=namespace, body=manifest)
|
||||
logger.info(f"Service {name} is created")
|
||||
except ApiException as exc:
|
||||
if exc.status == 409:
|
||||
logger.warning(f'Service {name} already exists, will be replaced')
|
||||
core_v1_api.replace_namespaced_service(name=name, namespace=namespace, body=manifest)
|
||||
logger.info(f"Service {name} is replaced")
|
||||
33
controller/src/bound.py
Normal file
33
controller/src/bound.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from config import UNIP_CONTROLLER_DEV
|
||||
|
||||
|
||||
def not_dev_namespace(body, **_):
|
||||
if UNIP_CONTROLLER_DEV == 'True' or UNIP_CONTROLLER_DEV is True:
|
||||
return True
|
||||
metadata = body.get('metadata', None)
|
||||
if not metadata:
|
||||
return False
|
||||
namespace: str = metadata.get('namespace', None)
|
||||
if not namespace:
|
||||
return False
|
||||
if namespace.startswith('dev-'):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def in_pu_namespace(body, **_):
|
||||
metadata = body.get('metadata', None)
|
||||
if not metadata:
|
||||
return False
|
||||
namespace: str = metadata.get('namespace', None)
|
||||
if not namespace:
|
||||
return False
|
||||
if (namespace.startswith('pu-') or '-pu-' in namespace) and '-pa-' not in namespace:
|
||||
return True
|
||||
return False
|
||||
0
controller/src/box/__init__.py
Normal file
0
controller/src/box/__init__.py
Normal file
11
controller/src/box/config.py
Normal file
11
controller/src/box/config.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DataBox
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET = os.getenv('UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET')
|
||||
UNIP_BOXES_CSI_S3_SECRET_NAME = os.getenv('UNIP_BOXES_CSI_S3_SECRET_NAME')
|
||||
UNIP_BOXES_CSI_S3_STORAGE_CLASS = os.getenv('UNIP_BOXES_CSI_S3_STORAGE_CLASS')
|
||||
877
controller/src/box/handlers.py
Normal file
877
controller/src/box/handlers.py
Normal file
|
|
@ -0,0 +1,877 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DataBox
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Sequence, Union
|
||||
|
||||
import kopf
|
||||
import yaml
|
||||
from kopf import PermanentError, TemporaryError
|
||||
from kubernetes.client import CoreV1Api, V1Secret, ApiException, V1PersistentVolumeList, CustomObjectsApi
|
||||
from minio import Minio
|
||||
|
||||
from bound import not_dev_namespace
|
||||
from box.config import UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET, UNIP_BOXES_CSI_S3_SECRET_NAME, \
|
||||
UNIP_BOXES_CSI_S3_STORAGE_CLASS
|
||||
from box.jinja import jinja_env
|
||||
from exceptions import InputValidationPermanentError, \
|
||||
ObjectDoesNotExistTemporaryError, InternalErrorPermanentError, InputValidationTemporaryError
|
||||
from kopf_k8s_client import api_client
|
||||
|
||||
UNIP_CONTROLLER_NAMESPACE = 'unip-system-controller'
|
||||
|
||||
S3_PLATFORM_USER_SECRET_NAME = 'csi-s3-secret'
|
||||
|
||||
CSI_S3_PV_NAME_SUFFIX = '-csi-s3-pv'
|
||||
CSI_S3_PVC_NAME_SUFFIX = '-csi-s3-pvc'
|
||||
PV_FULL_NAME_LABEL = 'unified-platform.cs.hse.ru/pv-full-name'
|
||||
PV_FULL_NAME_TEMPLATE = '{platform_app_name}-{box_name}{suffix}'
|
||||
|
||||
CSI_S3_PLATFORM_USER_STORAGE_CLASS_NAME_SUFFIX = '-csi-s3-sc'
|
||||
|
||||
BOX_OWNER_NAME = 'developer'
|
||||
|
||||
WAITING_PVC_READINESS = 'WaitingPVCReadiness'
|
||||
WAITING_PV_READINESS = 'WaitingPVReadiness'
|
||||
PVC_IS_READY = 'PVCIsReady'
|
||||
PV_IS_READY = 'PVIsReady'
|
||||
PVC_FAILED = 'PVCAllocationFailed'
|
||||
PV_FAILED = 'PVAllocationFailed'
|
||||
WAITING_PVC_IS_DELETED = 'WaitingPVCIsDeleted'
|
||||
WAITING_PV_IS_DELETED = 'WaitingPVIsDeleted'
|
||||
PVC_IS_DELETED = 'PVCIsDeleted'
|
||||
PV_IS_DELETED = 'PVIsDeleted'
|
||||
|
||||
FAILURE = 'Failure'
|
||||
READY = 'Ready'
|
||||
DELETING = 'Deleting'
|
||||
|
||||
MAX_RETRIES = 10
|
||||
|
||||
|
||||
class PVIsNotSet(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class PVorPVCAlreadyExists(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class PVorPVCAllocationTimeout(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class AllocationFailedError(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class AllocationTimeoutError(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class WaitingAllocationError(TemporaryError):
|
||||
pass
|
||||
|
||||
|
||||
class WaitingDeletionError(TemporaryError):
|
||||
pass
|
||||
|
||||
|
||||
def _prepare_manifest(template_name, variables):
|
||||
template = jinja_env.get_template(template_name)
|
||||
text = template.render(variables)
|
||||
data = yaml.safe_load(text)
|
||||
return data
|
||||
|
||||
|
||||
def _check_box_has_valid_kind(spec):
|
||||
if 's3Storage' not in spec \
|
||||
and 's3DefaultStorage' not in spec \
|
||||
and 'datasetReference' not in spec:
|
||||
raise InputValidationPermanentError('Only s3Storage, s3DefaultStorage, datasetReference are implemented now')
|
||||
|
||||
|
||||
def _get_s3_client_from_secret(secret_body: dict):
|
||||
if 'data' not in secret_body:
|
||||
raise InputValidationPermanentError(f'data not set in secret {S3_PLATFORM_USER_SECRET_NAME}')
|
||||
data = secret_body['data']
|
||||
if not (req_fields := {'accessKeyID', 'secretAccessKey', 'endpoint'}) <= data.keys():
|
||||
raise InputValidationPermanentError(f'some of {req_fields} not set in secret {S3_PLATFORM_USER_SECRET_NAME}')
|
||||
|
||||
access_key = base64.b64decode(data['accessKeyID']).decode('utf-8')
|
||||
secret_key = base64.b64decode(data['secretAccessKey']).decode('utf-8')
|
||||
endpoint = base64.b64decode(data['endpoint']).decode('utf-8')
|
||||
endpoint = endpoint.replace('https://', '').replace('http://', '')
|
||||
|
||||
s3_client = Minio(endpoint=endpoint, access_key=access_key, secret_key=secret_key)
|
||||
return s3_client
|
||||
|
||||
|
||||
def _get_s3_client_using_s3_platform_user_creds(core_v1_api: CoreV1Api, namespace):
|
||||
try:
|
||||
secret: V1Secret = core_v1_api.read_namespaced_secret(name=S3_PLATFORM_USER_SECRET_NAME,
|
||||
namespace=namespace)
|
||||
secret_body = secret.to_dict()
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise InputValidationTemporaryError(f'CSI S3 Secret not found, used secret name: '
|
||||
f'{S3_PLATFORM_USER_SECRET_NAME}', delay=15)
|
||||
raise exc
|
||||
|
||||
s3_client = _get_s3_client_from_secret(secret_body)
|
||||
return s3_client
|
||||
|
||||
|
||||
def _get_s3_client_using_controller_creds(core_v1_api: CoreV1Api):
|
||||
try:
|
||||
secret: V1Secret = core_v1_api.read_namespaced_secret(name=UNIP_BOXES_CSI_S3_SECRET_NAME,
|
||||
namespace=UNIP_CONTROLLER_NAMESPACE)
|
||||
secret_body = secret.to_dict()
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise InputValidationPermanentError(f'Controller S3 Secret not found, used secret name: '
|
||||
f'{UNIP_BOXES_CSI_S3_SECRET_NAME}')
|
||||
raise exc
|
||||
|
||||
s3_client = _get_s3_client_from_secret(secret_body)
|
||||
return s3_client
|
||||
|
||||
|
||||
def _get_s3_box(co_api: CustomObjectsApi, name, namespace):
|
||||
try:
|
||||
box_obj = co_api.get_namespaced_custom_object(
|
||||
"unified-platform.cs.hse.ru", "v1", namespace,
|
||||
"databoxes", name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise ObjectDoesNotExistTemporaryError(
|
||||
f'DatasetReference box {name} doesnt exist in namespace {namespace}')
|
||||
raise exc
|
||||
|
||||
box_spec = box_obj['spec']
|
||||
if 's3Storage' not in box_spec and 's3DefaultStorage' not in box_spec:
|
||||
raise InputValidationPermanentError(f'Dataset box {namespace}.{name} '
|
||||
f'is not s3Storage and not s3DefaultStorage')
|
||||
return box_obj
|
||||
|
||||
|
||||
def _get_dataset(co_api: CustomObjectsApi, spec, namespace):
|
||||
dataset_ref = spec['datasetReference']
|
||||
dataset_cmp_ref = dataset_ref['datasetComponentRef']
|
||||
dataset_name = dataset_cmp_ref['name']
|
||||
dataset_namespace = dataset_cmp_ref.get('namespace', namespace)
|
||||
|
||||
try:
|
||||
dataset_obj = co_api.get_namespaced_custom_object(
|
||||
"unified-platform.cs.hse.ru", "v1", dataset_namespace,
|
||||
"datasetcomponents", dataset_name)
|
||||
return dataset_obj
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise ObjectDoesNotExistTemporaryError(
|
||||
f'DatasetReference {dataset_name} doesnt exist in namespace {dataset_namespace}')
|
||||
raise exc
|
||||
|
||||
def _get_s3_box_from_dataset_ref(co_api: CustomObjectsApi, spec, namespace):
|
||||
dataset_obj = _get_dataset(co_api, spec, namespace)
|
||||
dataset_namespace = dataset_obj['metadata']['namespace']
|
||||
dataset_spec = dataset_obj['spec']
|
||||
box_name = dataset_spec['box']
|
||||
|
||||
box_obj = _get_s3_box(co_api, box_name, dataset_namespace)
|
||||
return box_obj
|
||||
|
||||
|
||||
def _check_if_dataset_exists(core_v1_api: CoreV1Api, co_api: CustomObjectsApi, spec, namespace):
|
||||
dataset_ref = spec['datasetReference']
|
||||
dataset_cmp_ref = dataset_ref['datasetComponentRef']
|
||||
dataset_namespace = dataset_cmp_ref.get('namespace', namespace)
|
||||
|
||||
box_obj = _get_s3_box_from_dataset_ref(co_api, spec, namespace)
|
||||
box_spec = box_obj['spec']
|
||||
|
||||
_check_if_bucket_exists(core_v1_api, box_spec, dataset_namespace)
|
||||
|
||||
|
||||
def _check_if_bucket_exists(core_v1_api: CoreV1Api, spec, namespace):
|
||||
s3_client = None
|
||||
bucket_name = None
|
||||
|
||||
if 's3Storage' in spec:
|
||||
s3_storage = spec['s3Storage']
|
||||
bucket = s3_storage['bucket']
|
||||
bucket_name = bucket['name']
|
||||
s3_client = _get_s3_client_using_s3_platform_user_creds(core_v1_api, namespace)
|
||||
if 's3DefaultStorage' in spec:
|
||||
bucket_name = UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET
|
||||
if not bucket_name:
|
||||
raise InternalErrorPermanentError('Platform default user data bucket not specified')
|
||||
s3_client = _get_s3_client_using_controller_creds(core_v1_api)
|
||||
|
||||
if not s3_client.bucket_exists(bucket_name):
|
||||
raise ObjectDoesNotExistTemporaryError(f'Bucket {bucket_name} does not exist')
|
||||
|
||||
|
||||
def _check_if_related_objects_exist(core_v1_api: CoreV1Api, co_api: CustomObjectsApi, spec, namespace):
|
||||
if 's3Storage' in spec or 's3DefaultStorage' in spec:
|
||||
_check_if_bucket_exists(core_v1_api, spec, namespace)
|
||||
elif 'datasetReference' in spec:
|
||||
_check_if_dataset_exists(core_v1_api, co_api, spec, namespace)
|
||||
else:
|
||||
raise InputValidationPermanentError('Only s3Storage, s3DefaultStorage, datasetReference are implemented now')
|
||||
|
||||
|
||||
def _get_pv_full_name(platform_app_name, box_name):
|
||||
pv_full_name = PV_FULL_NAME_TEMPLATE.format(platform_app_name=platform_app_name,
|
||||
box_name=box_name,
|
||||
suffix=CSI_S3_PV_NAME_SUFFIX)
|
||||
return pv_full_name
|
||||
|
||||
|
||||
def _get_pvc_name(box_name):
|
||||
pvc_name = box_name + CSI_S3_PVC_NAME_SUFFIX
|
||||
return pvc_name
|
||||
|
||||
|
||||
def _check_if_pv_is_set(status):
|
||||
if 's3Storage' in status:
|
||||
return ('csiS3PersistentVolumeName' in status['s3Storage']) \
|
||||
and (status['s3Storage']['csiS3PersistentVolumeName'] is not None)
|
||||
return False
|
||||
|
||||
|
||||
def _check_if_pvc_is_set(status):
|
||||
if 's3Storage' in status:
|
||||
return ('csiS3PersistentVolumeClaimName' in status['s3Storage']) and \
|
||||
(status['s3Storage']['csiS3PersistentVolumeClaimName'] is not None)
|
||||
return False
|
||||
|
||||
|
||||
def _check_if_pv_does_not_exist(core_v1_api: CoreV1Api, pv_full_name, logger):
|
||||
pvs: V1PersistentVolumeList = core_v1_api.list_persistent_volume(
|
||||
label_selector=f'{PV_FULL_NAME_LABEL}={pv_full_name}')
|
||||
if len(pvs.items) != 0:
|
||||
logger.error(f"Persistent volume with label {PV_FULL_NAME_LABEL}={pv_full_name} already exists")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _check_if_pvc_does_not_exist(core_v1_api: CoreV1Api, pvc_name, namespace, logger):
|
||||
try:
|
||||
core_v1_api.read_namespaced_persistent_volume_claim(pvc_name, namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
return True
|
||||
raise exc
|
||||
logger.error(f"PVC {pvc_name} in namespace {namespace} already exists")
|
||||
return False
|
||||
|
||||
|
||||
def _get_condition(type_, reason, message):
|
||||
return {
|
||||
'type': type_,
|
||||
'conditionStatus': 'True',
|
||||
'lastTransitionTime': datetime.now(timezone.utc).isoformat(),
|
||||
'reason': reason,
|
||||
'message': message
|
||||
}
|
||||
|
||||
|
||||
def _get_failed_condition(reason, message):
|
||||
return _get_condition(FAILURE, reason, message)
|
||||
|
||||
|
||||
def _get_waiting_pv_ready_condition():
|
||||
return _get_condition(WAITING_PV_READINESS, "", "")
|
||||
|
||||
|
||||
def _get_pv_is_ready_condition():
|
||||
return _get_condition(PV_IS_READY, "", "")
|
||||
|
||||
|
||||
def _get_pv_failed_condition():
|
||||
return _get_condition(PV_FAILED, "", "")
|
||||
|
||||
|
||||
def _get_waiting_pv_is_deleted_condition():
|
||||
return _get_condition(WAITING_PV_IS_DELETED, "", "")
|
||||
|
||||
|
||||
def _get_pv_is_deleted_condition():
|
||||
return _get_condition(PV_IS_DELETED, "", "")
|
||||
|
||||
|
||||
def _get_waiting_pvc_ready_condition():
|
||||
return _get_condition(WAITING_PVC_READINESS, "", "")
|
||||
|
||||
|
||||
def _get_pvc_is_ready_condition():
|
||||
return _get_condition(PVC_IS_READY, "", "")
|
||||
|
||||
|
||||
def _get_pvc_failed_condition():
|
||||
return _get_condition(PVC_FAILED, "", "")
|
||||
|
||||
|
||||
def _get_waiting_pvc_is_deleted_condition():
|
||||
return _get_condition(WAITING_PVC_IS_DELETED, "", "")
|
||||
|
||||
|
||||
def _get_pvc_is_deleted_condition():
|
||||
return _get_condition(PVC_IS_DELETED, "", "")
|
||||
|
||||
|
||||
def _get_box_is_ready_condition():
|
||||
return _get_condition(READY, "", "")
|
||||
|
||||
|
||||
def _get_box_failure_condition(reason="", message=""):
|
||||
return _get_condition(FAILURE, reason, message)
|
||||
|
||||
|
||||
def _get_box_deleting_condition(reason="", message=""):
|
||||
return _get_condition(DELETING, reason, message)
|
||||
|
||||
|
||||
def _make_condition_transition(patch, condition_from_type: Union[Sequence, str], condition_to):
|
||||
conditions_patch = patch['status']['conditions']
|
||||
conditions_from = [condition_from_type] if isinstance(condition_from_type, str) else condition_from_type
|
||||
for condition_from in conditions_from:
|
||||
for c in conditions_patch:
|
||||
if c['type'] == condition_from and c['conditionStatus'] == 'True':
|
||||
c['conditionStatus'] = 'False'
|
||||
c['lastTransitionTime'] = datetime.now(timezone.utc).isoformat()
|
||||
conditions_patch.append(condition_to)
|
||||
|
||||
|
||||
def _set_pvc_is_ready(patch):
|
||||
_make_condition_transition(patch, WAITING_PVC_READINESS, _get_pvc_is_ready_condition())
|
||||
|
||||
|
||||
def _set_pvc_failed(patch):
|
||||
_make_condition_transition(patch, WAITING_PVC_READINESS, _get_pvc_failed_condition())
|
||||
|
||||
|
||||
def _set_pv_is_ready(patch):
|
||||
_make_condition_transition(patch, WAITING_PV_READINESS, _get_pv_is_ready_condition())
|
||||
|
||||
|
||||
def _set_pv_failed(patch):
|
||||
_make_condition_transition(patch, WAITING_PV_READINESS, _get_pv_failed_condition())
|
||||
|
||||
|
||||
def _set_box_is_ready(patch):
|
||||
conditions_patch = patch['status']['conditions']
|
||||
conditions_patch.append(_get_box_is_ready_condition())
|
||||
|
||||
|
||||
def _set_box_failure(patch, reason="", message=""):
|
||||
_make_condition_transition(patch, READY, _get_box_failure_condition(reason, message))
|
||||
|
||||
|
||||
def _get_and_update_pvc_status(core_v1_api: CoreV1Api, namespace, patch):
|
||||
pvc_is_ready = False
|
||||
pvc_failed = False
|
||||
status = patch['status']
|
||||
conditions = status['conditions']
|
||||
for c in conditions:
|
||||
if c['type'] == PVC_IS_READY and c['conditionStatus'] == 'True':
|
||||
pvc_is_ready = True
|
||||
break
|
||||
if c['type'] == PVC_FAILED and c['conditionStatus'] == 'True':
|
||||
pvc_failed = True
|
||||
break
|
||||
if c['type'] == WAITING_PVC_READINESS and c['conditionStatus'] == 'True':
|
||||
pvc_name = status['s3Storage']['csiS3PersistentVolumeClaimName']
|
||||
pvc = core_v1_api.read_namespaced_persistent_volume_claim(name=pvc_name, namespace=namespace)
|
||||
if pvc.status.phase in {'Available', 'Bound'}:
|
||||
_set_pvc_is_ready(patch)
|
||||
pvc_is_ready = True
|
||||
break
|
||||
if pvc.status.phase == 'Failed':
|
||||
_set_pvc_failed(patch)
|
||||
pvc_failed = True
|
||||
break
|
||||
return pvc_is_ready, pvc_failed
|
||||
|
||||
|
||||
def _get_and_update_pv_status(core_v1_api: CoreV1Api, patch):
|
||||
pv_is_ready = False
|
||||
pv_failed = False
|
||||
status = patch['status']
|
||||
conditions = status['conditions']
|
||||
for c in conditions:
|
||||
if c['type'] == PV_IS_READY and c['conditionStatus'] == 'True':
|
||||
pv_is_ready = True
|
||||
break
|
||||
if c['type'] == PV_FAILED and c['conditionStatus'] == 'True':
|
||||
pv_failed = True
|
||||
break
|
||||
if c['type'] == WAITING_PV_READINESS and c['conditionStatus'] == 'True':
|
||||
pv_name = status['s3Storage']['csiS3PersistentVolumeName']
|
||||
pv = core_v1_api.read_persistent_volume(name=pv_name)
|
||||
if pv.status.phase in {'Available', 'Bound'}:
|
||||
_set_pv_is_ready(patch)
|
||||
pv_is_ready = True
|
||||
break
|
||||
if pv.status.phase == 'Failed':
|
||||
_set_pv_failed(patch)
|
||||
pv_failed = True
|
||||
break
|
||||
return pv_is_ready, pv_failed
|
||||
|
||||
|
||||
def _check_box_is_ready(status):
|
||||
conditions = status['conditions']
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] == READY)
|
||||
|
||||
|
||||
def _check_box_failure(status):
|
||||
conditions = status['conditions']
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] == FAILURE)
|
||||
|
||||
|
||||
def _check_allocation_and_change_status(core_v1_api: CoreV1Api, namespace, patch, retry_num):
|
||||
patch_status = patch['status']
|
||||
|
||||
pvc_is_ready, pvc_failed = _get_and_update_pvc_status(core_v1_api, namespace, patch)
|
||||
pv_is_ready, pv_failed = _get_and_update_pv_status(core_v1_api, patch)
|
||||
|
||||
if pvc_is_ready and pv_is_ready:
|
||||
_set_box_is_ready(patch)
|
||||
return
|
||||
|
||||
if pv_failed or pvc_failed:
|
||||
_set_box_failure(patch)
|
||||
raise AllocationFailedError
|
||||
|
||||
if _check_box_is_ready(patch_status):
|
||||
return
|
||||
if _check_box_failure(patch_status):
|
||||
return
|
||||
|
||||
if retry_num >= MAX_RETRIES - 1:
|
||||
_set_box_failure(patch)
|
||||
raise AllocationTimeoutError
|
||||
|
||||
raise WaitingAllocationError(delay=30)
|
||||
|
||||
|
||||
def _create_pv_resource(core_v1_api: CoreV1Api, pv_full_name,
|
||||
manifest, logger):
|
||||
pvs: V1PersistentVolumeList = core_v1_api.list_persistent_volume(
|
||||
label_selector=f'{PV_FULL_NAME_LABEL}={pv_full_name}')
|
||||
if len(pvs.items) != 0:
|
||||
logger.info(f"Persistent volume with label {PV_FULL_NAME_LABEL}={pv_full_name} already exists, do nothing")
|
||||
return
|
||||
core_v1_api.create_persistent_volume(body=manifest)
|
||||
|
||||
|
||||
def _get_s3_storage_attrs(spec):
|
||||
s3_storage = spec['s3Storage']
|
||||
capacity = s3_storage['capacity']
|
||||
bucket = s3_storage['bucket']
|
||||
bucket_name = bucket['name']
|
||||
if 'subPath' in bucket:
|
||||
bucket_mount_sub_path = bucket['subPath']
|
||||
bucket_path = bucket_name + '/' + bucket_mount_sub_path if bucket_mount_sub_path[0] != '/' \
|
||||
else bucket_name + bucket_mount_sub_path
|
||||
else:
|
||||
bucket_path = bucket_name
|
||||
return capacity, bucket_name, bucket_path
|
||||
|
||||
def _get_s3_default_storage_attrs(spec, namespace, box_name):
|
||||
s3_storage = spec['s3DefaultStorage']
|
||||
capacity = s3_storage['capacity']
|
||||
bucket_name = UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET
|
||||
bucket_path = bucket_name + '/' + namespace + '/' + box_name
|
||||
return capacity, bucket_name, bucket_path
|
||||
|
||||
|
||||
def _create_csi_s3_pv(core_v1_api: CoreV1Api, co_api: CustomObjectsApi, namespace, box_name, spec):
|
||||
if 's3Storage' in spec:
|
||||
capacity, bucket_name, bucket_path = _get_s3_storage_attrs(spec)
|
||||
storage_class = namespace + CSI_S3_PLATFORM_USER_STORAGE_CLASS_NAME_SUFFIX
|
||||
pv_namespace = namespace
|
||||
elif 's3DefaultStorage' in spec:
|
||||
capacity, bucket_name, bucket_path = _get_s3_default_storage_attrs(spec, namespace, box_name)
|
||||
storage_class = UNIP_BOXES_CSI_S3_STORAGE_CLASS
|
||||
pv_namespace = UNIP_CONTROLLER_NAMESPACE
|
||||
elif 'datasetReference' in spec:
|
||||
dataset_ref = spec['datasetReference']
|
||||
dataset_cmp_ref = dataset_ref['datasetComponentRef']
|
||||
box_obj = _get_s3_box_from_dataset_ref(co_api, spec, namespace)
|
||||
box_spec = box_obj['spec']
|
||||
dataset_box_name = box_obj['metadata']['name']
|
||||
dataset_box_namespace = dataset_cmp_ref.get('namespace', namespace)
|
||||
if 's3Storage' in box_spec:
|
||||
capacity, bucket_name, bucket_path = _get_s3_storage_attrs(box_spec)
|
||||
storage_class = dataset_box_namespace + CSI_S3_PLATFORM_USER_STORAGE_CLASS_NAME_SUFFIX
|
||||
pv_namespace = dataset_box_namespace
|
||||
elif 's3DefaultStorage' in box_spec:
|
||||
capacity, bucket_name, bucket_path = _get_s3_default_storage_attrs(box_spec, dataset_box_namespace, dataset_box_name)
|
||||
storage_class = UNIP_BOXES_CSI_S3_STORAGE_CLASS
|
||||
pv_namespace = UNIP_CONTROLLER_NAMESPACE
|
||||
else:
|
||||
raise InputValidationPermanentError('DatasetReference must reference s3Storage or s3DefaultStorage')
|
||||
bucket_path = bucket_path.rstrip('/')
|
||||
bucket_path = bucket_path + '/users/' + BOX_OWNER_NAME + '/file_groups'
|
||||
else:
|
||||
raise InputValidationPermanentError('Only s3Storage, s3DefaultStorage, datasetReference are implemented now')
|
||||
|
||||
pv_full_name = _get_pv_full_name(platform_app_name=namespace, box_name=box_name)
|
||||
pv_name = str(uuid.uuid4()) + CSI_S3_PV_NAME_SUFFIX
|
||||
pvc_name = _get_pvc_name(box_name=box_name)
|
||||
|
||||
pv_vars = {
|
||||
'name': pv_name,
|
||||
'namespace': pv_namespace,
|
||||
'storage_class': storage_class,
|
||||
'capacity': capacity,
|
||||
'pvc_namespace': namespace,
|
||||
'pvc_name': pvc_name,
|
||||
'bucket_name_with_path_inside_bucket': bucket_path,
|
||||
'labels': [{'key': PV_FULL_NAME_LABEL, 'value': pv_full_name}]
|
||||
}
|
||||
|
||||
pv_manifest = _prepare_manifest('csi-s3-pv.yaml', pv_vars)
|
||||
|
||||
pv = core_v1_api.create_persistent_volume(body=pv_manifest)
|
||||
return pv.metadata.uid, pv_name
|
||||
|
||||
|
||||
def _create_csi_s3_pvc(core_v1_api: CoreV1Api, co_api: CustomObjectsApi, namespace, box_name, spec, pv_name):
|
||||
pvc_name = _get_pvc_name(box_name=box_name)
|
||||
|
||||
if 's3Storage' in spec:
|
||||
s3_storage = spec['s3Storage']
|
||||
capacity = s3_storage['capacity']
|
||||
elif 's3DefaultStorage' in spec:
|
||||
s3_storage = spec['s3DefaultStorage']
|
||||
capacity = s3_storage['capacity']
|
||||
elif 'datasetReference' in spec:
|
||||
box_obj = _get_s3_box_from_dataset_ref(co_api, spec, namespace)
|
||||
box_spec = box_obj['spec']
|
||||
if 's3Storage' in box_spec:
|
||||
s3_storage = spec['s3Storage']
|
||||
capacity = s3_storage['capacity']
|
||||
elif 's3DefaultStorage' in box_spec:
|
||||
s3_storage = box_spec['s3DefaultStorage']
|
||||
capacity = s3_storage['capacity']
|
||||
else:
|
||||
raise InputValidationPermanentError('datasetReference must reference s3Storage or s3DefaultStorage')
|
||||
else:
|
||||
raise InputValidationPermanentError('Only s3Storage, s3DefaultStorage, datasetReference are implemented now')
|
||||
|
||||
pvc_vars = {
|
||||
'name': pvc_name,
|
||||
'namespace': namespace,
|
||||
'pv_name': pv_name,
|
||||
'capacity': capacity
|
||||
}
|
||||
|
||||
pvc_manifest = _prepare_manifest('csi-s3-pvc.yaml', pvc_vars)
|
||||
|
||||
pvc = core_v1_api.create_namespaced_persistent_volume_claim(namespace=namespace, body=pvc_manifest)
|
||||
return pvc.metadata.uid, pvc_name
|
||||
|
||||
|
||||
def _create_csi_s3_pv_and_set_status(patch, core_v1_api: CoreV1Api, co_api: CustomObjectsApi,
|
||||
name, spec, namespace):
|
||||
pv_id, pv_name = _create_csi_s3_pv(core_v1_api, co_api, namespace, name, spec)
|
||||
patch['status'].setdefault('s3Storage', {})
|
||||
patch['status']['s3Storage']['csiS3PersistentVolumeUID'] = pv_id
|
||||
patch['status']['s3Storage']['csiS3PersistentVolumeName'] = pv_name
|
||||
condition = _get_waiting_pv_ready_condition()
|
||||
patch['status']['conditions'].append(condition)
|
||||
|
||||
|
||||
def _create_csi_s3_pvc_and_set_status(patch, core_v1_api: CoreV1Api, co_api: CustomObjectsApi,
|
||||
name, spec, pv_name, namespace):
|
||||
pvc_id, pvc_name = _create_csi_s3_pvc(core_v1_api, co_api, namespace, name, spec, pv_name)
|
||||
patch['status'].setdefault('s3Storage', {})
|
||||
patch['status']['s3Storage']['csiS3PersistentVolumeClaimUID'] = pvc_id
|
||||
patch['status']['s3Storage']['csiS3PersistentVolumeClaimName'] = pvc_name
|
||||
condition = _get_waiting_pvc_ready_condition()
|
||||
patch['status']['conditions'].append(condition)
|
||||
|
||||
|
||||
def _create_s3_pv_and_pvc(patch, core_v1_api: CoreV1Api, co_api: CustomObjectsApi,
|
||||
name, spec, status, namespace, logger, retry):
|
||||
pv_full_name = _get_pv_full_name(platform_app_name=namespace, box_name=name)
|
||||
pvc_name = _get_pvc_name(box_name=name)
|
||||
|
||||
if status:
|
||||
if 'conditions' in status:
|
||||
patch['status']['conditions'] = status['conditions']
|
||||
if 's3Storage' in status:
|
||||
patch['status']['s3Storage'] = status['s3Storage']
|
||||
|
||||
patch_status = patch['status']
|
||||
|
||||
if not _check_if_pv_is_set(patch_status):
|
||||
pv_does_not_exist = _check_if_pv_does_not_exist(core_v1_api, pv_full_name, logger)
|
||||
if not pv_does_not_exist:
|
||||
# todo: более корректно сохранять в аннотации PV uid ящика, и определять принадлежность по нему;
|
||||
# потому что обновление статуса box после создания PV может не произойти из-за отказа kopf;
|
||||
# поэтому важно связывать ящик и PV одной операцией, такая операция - указывать
|
||||
# в аннотации PV uid ящика;
|
||||
# todo: другой вариант - 1) записывать в статус родителя "начало создания",
|
||||
# 2) выполнять создание дочернего ресурса, 3) подтверждать создание (при отказе на любом из шагов,
|
||||
# состояние можно восстановить)
|
||||
reason = message = f'PV with given label {pv_full_name} already exists'
|
||||
_set_box_failure(patch, reason, message)
|
||||
raise PVorPVCAlreadyExists
|
||||
_create_csi_s3_pv_and_set_status(patch, core_v1_api, co_api, name, spec, namespace)
|
||||
|
||||
if not _check_if_pvc_is_set(patch_status):
|
||||
pvc_does_not_exist = _check_if_pvc_does_not_exist(core_v1_api, pvc_name, namespace, logger)
|
||||
if not pvc_does_not_exist:
|
||||
reason = message = f'PVC {pvc_name} already exists'
|
||||
_set_box_failure(patch, reason, message)
|
||||
raise PVorPVCAlreadyExists
|
||||
pv_name = patch_status['s3Storage']['csiS3PersistentVolumeName']
|
||||
|
||||
_create_csi_s3_pvc_and_set_status(patch, core_v1_api, co_api, name, spec, pv_name, namespace)
|
||||
|
||||
_check_allocation_and_change_status(core_v1_api, namespace, patch, retry)
|
||||
|
||||
|
||||
def _check_pv_is_ready(conditions):
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] in {PV_IS_READY, WAITING_PV_READINESS})
|
||||
|
||||
|
||||
def _check_pvc_is_ready(conditions):
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] in {PVC_IS_READY, WAITING_PVC_READINESS})
|
||||
|
||||
|
||||
def _check_waiting_pv_is_deleted(conditions):
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] == WAITING_PV_IS_DELETED)
|
||||
|
||||
|
||||
def _check_waiting_pvc_is_deleted(conditions):
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] == WAITING_PVC_IS_DELETED)
|
||||
|
||||
|
||||
def _check_pv_is_deleted(conditions):
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] == PV_IS_DELETED)
|
||||
|
||||
|
||||
def _check_pvc_is_deleted(conditions):
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] == PVC_IS_DELETED)
|
||||
|
||||
|
||||
def _check_box_is_deleting(conditions):
|
||||
return any(c['conditionStatus'] == 'True' for c in conditions if c['type'] == DELETING)
|
||||
|
||||
|
||||
def _check_can_delete_pv(core_v1_api: CoreV1Api, status, logger):
|
||||
pv_name = status['s3Storage']['csiS3PersistentVolumeName']
|
||||
try:
|
||||
core_v1_api.read_persistent_volume(pv_name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f'PV {pv_name} doesnt exist, nothing to delete')
|
||||
return True
|
||||
else:
|
||||
raise exc
|
||||
# здесь может быть любая проверка
|
||||
# if pv.status.phase == 'Failed':
|
||||
# return False
|
||||
return True
|
||||
|
||||
|
||||
def _check_can_delete_pvc(core_v1_api: CoreV1Api, namespace, status, logger):
|
||||
pvc_name = status['s3Storage']['csiS3PersistentVolumeClaimName']
|
||||
try:
|
||||
core_v1_api.read_namespaced_persistent_volume_claim(pvc_name, namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f'PVC {pvc_name} doesnt exist, nothing to delete')
|
||||
return True
|
||||
else:
|
||||
raise exc
|
||||
# здесь может быть любая проверка
|
||||
# if pvc.status.phase == 'Failed':
|
||||
# return False
|
||||
return True
|
||||
|
||||
|
||||
def _delete_pv(core_v1_api: CoreV1Api, status, logger):
|
||||
pv_name = status['s3Storage']['csiS3PersistentVolumeName']
|
||||
try:
|
||||
core_v1_api.delete_persistent_volume(pv_name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f'PV {pv_name} doesnt exist, nothing to delete')
|
||||
return True
|
||||
else:
|
||||
raise exc
|
||||
return True
|
||||
|
||||
|
||||
def _delete_pvc(core_v1_api: CoreV1Api, namespace, status, logger):
|
||||
pvc_name = status['s3Storage']['csiS3PersistentVolumeClaimName']
|
||||
try:
|
||||
core_v1_api.delete_namespaced_persistent_volume_claim(pvc_name, namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f'PVC {pvc_name} doesnt exist, nothing to delete')
|
||||
return True
|
||||
else:
|
||||
raise exc
|
||||
return True
|
||||
|
||||
|
||||
def _check_pv_resource_is_deleted(core_v1_api: CoreV1Api, status):
|
||||
pv_name = status['s3Storage']['csiS3PersistentVolumeName']
|
||||
try:
|
||||
core_v1_api.read_persistent_volume(pv_name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
return True
|
||||
else:
|
||||
raise exc
|
||||
return False
|
||||
|
||||
|
||||
def _check_pvc_resource_is_deleted(core_v1_api: CoreV1Api, namespace, status):
|
||||
pvc_name = status['s3Storage']['csiS3PersistentVolumeClaimName']
|
||||
try:
|
||||
core_v1_api.read_namespaced_persistent_volume_claim(namespace, pvc_name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
return True
|
||||
else:
|
||||
raise exc
|
||||
return False
|
||||
|
||||
|
||||
def _try_delete_pv_and_update_status(patch, core_v1_api: CoreV1Api, status, logger):
|
||||
conditions = patch['status']['conditions']
|
||||
if _check_pv_is_ready(conditions):
|
||||
if _check_can_delete_pv(core_v1_api, status, logger):
|
||||
_delete_pv(core_v1_api, status, logger)
|
||||
_make_condition_transition(patch, [WAITING_PV_READINESS, PV_IS_READY],
|
||||
_get_waiting_pv_is_deleted_condition())
|
||||
else:
|
||||
return False
|
||||
if _check_waiting_pv_is_deleted(conditions):
|
||||
if _check_pv_resource_is_deleted(core_v1_api, status):
|
||||
_make_condition_transition(patch, WAITING_PV_IS_DELETED, _get_pv_is_deleted_condition())
|
||||
else:
|
||||
return False
|
||||
if _check_pv_is_deleted(conditions):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _try_delete_pvc_and_update_status(patch, core_v1_api: CoreV1Api, namespace, status, logger):
|
||||
conditions = patch['status']['conditions']
|
||||
if _check_pvc_is_ready(conditions):
|
||||
if _check_can_delete_pvc(core_v1_api, namespace, status, logger):
|
||||
_delete_pvc(core_v1_api, namespace, status, logger)
|
||||
_make_condition_transition(patch, [WAITING_PVC_READINESS, PVC_IS_READY],
|
||||
_get_waiting_pvc_is_deleted_condition())
|
||||
else:
|
||||
return False
|
||||
if _check_waiting_pvc_is_deleted(conditions):
|
||||
if _check_pvc_resource_is_deleted(core_v1_api, namespace, status):
|
||||
_make_condition_transition(patch, WAITING_PVC_IS_DELETED, _get_pvc_is_deleted_condition())
|
||||
else:
|
||||
return False
|
||||
if _check_pvc_is_deleted(conditions):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _check_can_finish_deletion(patch):
|
||||
conditions = patch['status']['conditions']
|
||||
if _check_pv_is_deleted(conditions) and _check_pvc_is_deleted(conditions):
|
||||
return
|
||||
raise WaitingDeletionError(delay=10)
|
||||
|
||||
|
||||
def _update_box_deletion_status(patch):
|
||||
status = patch['status']
|
||||
if _check_box_is_ready(status) or _check_box_failure(status):
|
||||
condition = _get_box_deleting_condition()
|
||||
_make_condition_transition(patch, [READY, FAILURE], condition)
|
||||
|
||||
|
||||
def _delete_s3_pv_and_pvc(patch, core_v1_api: CoreV1Api, status, namespace, logger):
|
||||
if not _check_if_pv_is_set(status):
|
||||
reason = message = 'PV is not set, but expected'
|
||||
_set_box_failure(patch, reason, message)
|
||||
raise InternalErrorPermanentError(message)
|
||||
|
||||
if not _check_if_pvc_is_set(status):
|
||||
reason = message = 'PVC is not set, but expected'
|
||||
_set_box_failure(patch, reason, message)
|
||||
raise InternalErrorPermanentError(message)
|
||||
|
||||
if 'status' not in patch:
|
||||
patch['status'] = {}
|
||||
|
||||
patch['status'].setdefault('conditions', [])
|
||||
if status:
|
||||
if 'conditions' in status:
|
||||
patch['status']['conditions'] = status['conditions']
|
||||
|
||||
_update_box_deletion_status(patch)
|
||||
|
||||
pv_deleted = _try_delete_pv_and_update_status(patch, core_v1_api, status, logger)
|
||||
pvc_deleted = _try_delete_pvc_and_update_status(patch, core_v1_api, namespace, status, logger)
|
||||
if not pv_deleted or not pvc_deleted:
|
||||
raise WaitingDeletionError(delay=10)
|
||||
|
||||
_check_can_finish_deletion(patch)
|
||||
|
||||
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'databoxes', retries=7,
|
||||
field='spec.s3Storage', value=kopf.PRESENT, when=not_dev_namespace)
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'databoxes', retries=7,
|
||||
field='spec.s3DefaultStorage', value=kopf.PRESENT, when=not_dev_namespace)
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'databoxes', retries=7,
|
||||
field='spec.datasetReference', value=kopf.PRESENT, when=not_dev_namespace)
|
||||
def create_s3_box(name, namespace, spec, status, patch, logger, retry, **_):
|
||||
|
||||
if 'status' not in patch:
|
||||
patch['status'] = {}
|
||||
|
||||
# patch['status'] заполняется используемыми и изменяемыми атрибутами из status, чтобы можно было за один такт
|
||||
# обработчика выполнить все необходимые операции; в противном случае нужно либо 1) учитывать, что часть атрибутов
|
||||
# находится в patch (что изменено в текущей итерации), а часть - в status; либо 2) выполнять лишние итерации,
|
||||
# чтобы все атрибуты были перенесены в status.
|
||||
# кроме этого, заполнить patch['status']['conditions'] атрибутами из status['conditions'] нужно в любом случае,
|
||||
# потому что kopf не умеет мерджить списки (что происходит при примнении патча), а согласно API Conventions,
|
||||
# status.conditions должен быть списком;
|
||||
patch['status'].setdefault('conditions', [])
|
||||
patch['status'].setdefault('s3Storage', {})
|
||||
# для dataset правильнее хранить атрибуты в 'datasetReference', а не 's3Storage'
|
||||
|
||||
# код выше вынесен из метода _create_s3_pv_and_pvc, потому что должен быть выполнен даже в случае отказа в
|
||||
# _check_box_is_s3_storage и _check_if_bucket_exists
|
||||
|
||||
_check_box_has_valid_kind(spec)
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
_check_if_related_objects_exist(core_v1_api, co_api, spec, namespace)
|
||||
_create_s3_pv_and_pvc(patch, core_v1_api, co_api, name, spec, status, namespace, logger, retry)
|
||||
|
||||
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'databoxes', retries=7, field='spec.s3Storage',
|
||||
value=kopf.PRESENT, when=not_dev_namespace)
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'databoxes', retries=7, field='spec.s3DefaultStorage',
|
||||
value=kopf.PRESENT, when=not_dev_namespace)
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'databoxes', retries=7, field='spec.datasetReference',
|
||||
value=kopf.PRESENT, when=not_dev_namespace)
|
||||
def delete_s3_box(namespace, status, patch, logger, **_):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
_delete_s3_pv_and_pvc(patch, core_v1_api, status, namespace, logger)
|
||||
|
||||
18
controller/src/box/jinja.py
Normal file
18
controller/src/box/jinja.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DataBox
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
"""
|
||||
Box object jinja module.
|
||||
"""
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
jinja_env = Environment(
|
||||
loader=FileSystemLoader('templates/box'),
|
||||
lstrip_blocks=True,
|
||||
trim_blocks=True
|
||||
)
|
||||
|
||||
0
controller/src/cmplink/__init__.py
Normal file
0
controller/src/cmplink/__init__.py
Normal file
174
controller/src/cmplink/basic_auth_link.py
Normal file
174
controller/src/cmplink/basic_auth_link.py
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ComponentLink
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
import logging
|
||||
from typing import Mapping
|
||||
|
||||
from kubernetes.client import ApiClient, CustomObjectsApi, ApiException
|
||||
|
||||
from auth import create_htpasswd_password
|
||||
from basic_resources.secrets import prepare_secret_manifest, read_secret_if_exists, \
|
||||
create_or_update_secret, read_basic_auth_credentials_from_secret
|
||||
from cmplink.link import get_links_given_predicate, get_target_resource_if_exists, get_secret_name_from_link, \
|
||||
does_target_specified_in_link, TARGET_KIND_TO_CODE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _append_credentials_from_secret(creds_dict, secret, secret_name, secret_namespace, logger):
|
||||
users_passwords = read_basic_auth_credentials_from_secret(secret, secret_name, secret_namespace, logger)
|
||||
for user, password in users_passwords:
|
||||
if user in creds_dict:
|
||||
if creds_dict[user] != password:
|
||||
logger.warning(f'user password from secret {secret_namespace}.{secret_name} '
|
||||
f'may differ from password of same user in another used secret')
|
||||
creds_dict[user] = password
|
||||
|
||||
|
||||
def _get_basic_auth_links_given_target(api_client: ApiClient, target, target_namespace,
|
||||
target_kind, user_namespace):
|
||||
def predicate(link):
|
||||
link_spec = link['spec']
|
||||
if 'basicAuth' not in link_spec:
|
||||
return False
|
||||
|
||||
target_specified = does_target_specified_in_link(target, target_namespace, target_kind, link_spec)
|
||||
return target_specified
|
||||
|
||||
filtered_list = get_links_given_predicate(api_client, user_namespace, predicate)
|
||||
return filtered_list
|
||||
|
||||
|
||||
def _build_credentials(api_client: ApiClient, target, target_namespace, target_kind, user_namespace, logger,
|
||||
exclude_links_names):
|
||||
links_list = _get_basic_auth_links_given_target(api_client, target, target_namespace, target_kind, user_namespace)
|
||||
|
||||
creds_dict = {}
|
||||
|
||||
for link in links_list:
|
||||
link_metadata = link['metadata']
|
||||
link_name = link_metadata['name']
|
||||
if link_name in exclude_links_names:
|
||||
continue
|
||||
|
||||
secret_name = get_secret_name_from_link(link)
|
||||
if not secret_name:
|
||||
continue
|
||||
|
||||
secret = read_secret_if_exists(api_client, secret_name, user_namespace)
|
||||
if not secret:
|
||||
logger.info(f'Target link secret {user_namespace}.{secret_name} '
|
||||
f'does not exist')
|
||||
continue
|
||||
|
||||
_append_credentials_from_secret(creds_dict, secret, secret_name, user_namespace, logger)
|
||||
|
||||
return [{'user': k, 'password': v} for k, v in creds_dict.items()]
|
||||
|
||||
|
||||
def get_basic_auth_joint_secret_name(target, target_kind=None):
|
||||
secret_name = target + '-ba-joint-cred' if not target_kind \
|
||||
else f'{target}-{TARGET_KIND_TO_CODE[target_kind]}-ba-joint-cred'
|
||||
return secret_name
|
||||
|
||||
|
||||
def _creds_records_to_htpasswd_strings(records: list[Mapping]) -> list[str]:
|
||||
htpasswd_lines = []
|
||||
for record in records:
|
||||
user = record['user']
|
||||
password = record['password']
|
||||
encrypted_password = create_htpasswd_password(password)
|
||||
htpasswd_record = f'{user}:{encrypted_password}'
|
||||
htpasswd_lines.append(htpasswd_record)
|
||||
return htpasswd_lines
|
||||
|
||||
|
||||
def _get_basic_auth_secret_from_direct_spec(api_client: ApiClient, target, target_namespace, target_kind):
|
||||
api_obj = get_target_resource_if_exists(api_client, target, target_namespace, target_kind, logger)
|
||||
if not api_obj:
|
||||
return None
|
||||
|
||||
api_spec = api_obj['spec']
|
||||
restful_api = api_spec['restfulApi']
|
||||
auth = restful_api['auth']
|
||||
basic = auth.get('basic')
|
||||
secret_name = basic.get('credentials') if basic else None
|
||||
return secret_name
|
||||
|
||||
|
||||
def _append_creds_from_direct_spec(api_client: ApiClient, htpasswd_lines, target, target_namespace, target_kind):
|
||||
secret_name = _get_basic_auth_secret_from_direct_spec(api_client, target, target_namespace, target_kind)
|
||||
if not secret_name:
|
||||
return
|
||||
|
||||
secret_obj = read_secret_if_exists(api_client, secret_name, target_namespace)
|
||||
if not secret_obj:
|
||||
logger.info(f'Application {target_namespace} {target} (target_kind = {target_kind}) secret'
|
||||
f' {secret_name} specified directly does not exist')
|
||||
return
|
||||
|
||||
existing_users = set(line[:line.find(':')] for line in htpasswd_lines if ':' in line)
|
||||
|
||||
secret_data = secret_obj.to_dict()['data']
|
||||
|
||||
secret_creds = secret_data.get('auth')
|
||||
if not secret_creds:
|
||||
logger.warning(f'auth field in secret {target_namespace}.{secret_name} '
|
||||
f'has empty value or does not exist')
|
||||
return
|
||||
|
||||
secret_creds_decoded = base64.b64decode(secret_creds).decode('utf-8')
|
||||
for cred in secret_creds_decoded.split('\n'):
|
||||
if not cred.strip():
|
||||
logger.warning(f'credentials record in secret {target_namespace}.{secret_name} '
|
||||
f'is empty')
|
||||
continue
|
||||
|
||||
if ':' not in cred:
|
||||
logger.warning(f'wrong credentials format in secret {target_namespace}.{secret_name}, '
|
||||
f'some credentials may be omitted')
|
||||
continue
|
||||
|
||||
sep_index = cred.find(':')
|
||||
user, password = cred[:sep_index], cred[sep_index + 1:]
|
||||
if user in existing_users:
|
||||
logger.warning(f'user {user} from secret {target_namespace}.{secret_name} '
|
||||
f'is already presented in links credentials')
|
||||
continue
|
||||
|
||||
htpasswd_lines.append(cred)
|
||||
|
||||
|
||||
def create_or_update_basic_auth_joint_secret(api_client: ApiClient, target, target_namespace, user_namespace, logger,
|
||||
exclude_links_names=None, exclude_direct_spec=False, target_kind=None):
|
||||
# exclude_links_names нужно, потому что обработчик удаления ссылки отрабатывает перед фактическим удалением -
|
||||
# в списке прочитанных ссылок будет "удаляемая" ссылка
|
||||
if exclude_links_names is None:
|
||||
exclude_links_names = []
|
||||
creds = _build_credentials(api_client, target, target_namespace, target_kind,
|
||||
user_namespace, logger, exclude_links_names)
|
||||
htpasswd_lines = _creds_records_to_htpasswd_strings(creds)
|
||||
if not exclude_direct_spec:
|
||||
# от добавления явно указанных секретов можно будет отказаться
|
||||
_append_creds_from_direct_spec(api_client, htpasswd_lines, target, target_namespace, target_kind)
|
||||
htpasswd_string = '"' + '\\n'.join(htpasswd_lines) + '\\n"'
|
||||
basic_auth_data = []
|
||||
if htpasswd_string:
|
||||
basic_auth_data.append({'key': 'auth', 'value': htpasswd_string})
|
||||
secret_name = get_basic_auth_joint_secret_name(target, target_kind)
|
||||
|
||||
manifest = prepare_secret_manifest(name=secret_name,
|
||||
namespace=target_namespace,
|
||||
type_='Opaque',
|
||||
data=basic_auth_data,
|
||||
data_attr='stringData')
|
||||
|
||||
create_or_update_secret(api_client=api_client,
|
||||
name=secret_name,
|
||||
namespace=target_namespace,
|
||||
manifest=manifest,
|
||||
logger=logger)
|
||||
713
controller/src/cmplink/handlers.py
Normal file
713
controller/src/cmplink/handlers.py
Normal file
|
|
@ -0,0 +1,713 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ComponentLink
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
import kopf
|
||||
from kubernetes.client import CustomObjectsApi, ApiException
|
||||
|
||||
from basic_resources.secrets import set_secret_annotations_and_labels, delete_secret_if_exists, copy_secret_silently, \
|
||||
copy_secret_data
|
||||
from bound import not_dev_namespace, in_pu_namespace
|
||||
from cmplink.basic_auth_link import create_or_update_basic_auth_joint_secret
|
||||
from cmplink.keycloak_group_link import get_accounts_from_secret, \
|
||||
create_or_update_groups_joint_secret_and_restart_proxy
|
||||
from cmplink.keycloak_groups import set_group_groups_chain, delete_group, \
|
||||
set_group_accounts
|
||||
from cmplink.link import get_secret_name_from_link, get_links_given_predicate, get_links_if_exist, \
|
||||
unset_secret_anno_and_label, get_secret_name_if_set, TARGET_KIND_API_COMPONENT
|
||||
from config import OIDC_MODIFY
|
||||
from kopf_k8s_client import api_client
|
||||
|
||||
_KEYCLOAK_GROUP_ANNO_PREFIX = 'keycloakgroup.componentlink.unified-platform.cs.hse.ru'
|
||||
_KEYCLOAK_GROUP_LABEL = 'keycloakgroup.componentlink.unified-platform.cs.hse.ru'
|
||||
_BASIC_AUTH_ANNO_PREFIX = 'basicauth.componentlink.unified-platform.cs.hse.ru'
|
||||
_BASIC_AUTH_LABEL = 'basicauth.componentlink.unified-platform.cs.hse.ru'
|
||||
_CONFIG_ANNO_PREFIX = 'config.componentlink.unified-platform.cs.hse.ru'
|
||||
_CONFIG_LABEL = 'config.componentlink.unified-platform.cs.hse.ru'
|
||||
|
||||
|
||||
def is_keycloak_group_link(spec, **_):
|
||||
return 'keycloakGroup' in spec
|
||||
|
||||
|
||||
def is_basic_auth_link(spec, **_):
|
||||
return 'basicAuth' in spec
|
||||
|
||||
|
||||
def is_config_link(spec, **_):
|
||||
return 'config' in spec
|
||||
|
||||
|
||||
def _set_group_ids_in_status_by_patch(patch, group_ids):
|
||||
if 'status' not in patch:
|
||||
patch['status'] = {}
|
||||
|
||||
patch['status'].setdefault('keycloakGroup', {})
|
||||
status_kg = patch['status']['keycloakGroup']
|
||||
status_kg['groupIds'] = group_ids
|
||||
|
||||
|
||||
def _set_group_ids_in_status_by_client(kg_link_name, namespace, group_ids, logger):
|
||||
# groupIds используются для явной ассоциации KeycloakGroup ComponentLink (KGL) и группы Keycloak,
|
||||
# это может быть полезно при перемещении группы, если будет добавлена поддержка иерархии KGL;
|
||||
# до этого момента использование groupIds, хранимых в status, может быть заменено на поиск группы по полному пути;
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
patch = {'status': {'keycloakGroup': {'groupIds': group_ids}}}
|
||||
try:
|
||||
co_api.patch_namespaced_custom_object_status('unified-platform.cs.hse.ru', 'v1',
|
||||
namespace, 'componentlinks', kg_link_name, patch)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warning(f'KeycloakGroup ComponentLink {namespace}.{kg_link_name} '
|
||||
f'does not exist. Cannot set groupIds')
|
||||
return
|
||||
raise exc
|
||||
|
||||
|
||||
def _unset_keycloak_group_secret_anno_and_label(link_name, secret_name, namespace, logger):
|
||||
unset_secret_anno_and_label(api_client,
|
||||
_KEYCLOAK_GROUP_ANNO_PREFIX,
|
||||
_KEYCLOAK_GROUP_LABEL,
|
||||
link_name,
|
||||
secret_name,
|
||||
namespace,
|
||||
logger)
|
||||
|
||||
|
||||
def _unset_basic_auth_secret_anno_and_label(link_name, secret_name, namespace, logger):
|
||||
unset_secret_anno_and_label(api_client,
|
||||
_BASIC_AUTH_ANNO_PREFIX,
|
||||
_BASIC_AUTH_LABEL,
|
||||
link_name,
|
||||
secret_name,
|
||||
namespace,
|
||||
logger)
|
||||
|
||||
|
||||
def _unset_config_secret_anno_and_label(link_name, secret_name, namespace, logger):
|
||||
unset_secret_anno_and_label(api_client,
|
||||
_CONFIG_ANNO_PREFIX,
|
||||
_CONFIG_LABEL,
|
||||
link_name,
|
||||
secret_name,
|
||||
namespace,
|
||||
logger)
|
||||
|
||||
|
||||
def _set_keycloak_group_secret_anno_and_label(link_name, secret_name, namespace, logger):
|
||||
anno = {f'{_KEYCLOAK_GROUP_ANNO_PREFIX}/{link_name}': 'true'}
|
||||
labels = {_KEYCLOAK_GROUP_LABEL: 'true'}
|
||||
set_secret_annotations_and_labels(api_client, secret_name, namespace, anno, labels, logger)
|
||||
|
||||
|
||||
def _set_basic_auth_secret_anno_and_label(link_name, secret_name, namespace, logger):
|
||||
anno = {f'{_BASIC_AUTH_ANNO_PREFIX}/{link_name}': 'true'}
|
||||
labels = {_BASIC_AUTH_LABEL: 'true'}
|
||||
set_secret_annotations_and_labels(api_client, secret_name, namespace, anno, labels, logger)
|
||||
|
||||
|
||||
def _set_many_basic_auth_secrets_anno_and_label(ba_links, secret_name, namespace, logger):
|
||||
for ba_link in ba_links:
|
||||
link_name = ba_link['metadata']['name']
|
||||
anno = {f'{_BASIC_AUTH_ANNO_PREFIX}/{link_name}': 'true'}
|
||||
labels = {_BASIC_AUTH_LABEL: 'true'}
|
||||
set_secret_annotations_and_labels(api_client, secret_name, namespace, anno, labels, logger)
|
||||
|
||||
|
||||
def _set_config_secret_anno_and_label(link_name, secret_name, namespace, logger):
|
||||
anno = {f'{_CONFIG_ANNO_PREFIX}/{link_name}': 'true'}
|
||||
labels = {_CONFIG_LABEL: 'true'}
|
||||
set_secret_annotations_and_labels(api_client, secret_name, namespace, anno, labels, logger)
|
||||
|
||||
|
||||
def _get_keycloak_user_registry_name(spec):
|
||||
keycloak_group = spec['keycloakGroup']
|
||||
user_registry_ref = keycloak_group['userRegistryRef']
|
||||
kg_user_registry = user_registry_ref['name']
|
||||
return kg_user_registry
|
||||
|
||||
|
||||
def _set_group_and_accounts(name, user_namespace, spec, logger):
|
||||
keycloak_group_name = name
|
||||
user_registry_name = _get_keycloak_user_registry_name(spec)
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
group_ids = set_group_groups_chain(user_namespace, keycloak_group_name, logger, OIDC_MODIFY)
|
||||
_set_group_ids_in_status_by_client(name, user_namespace, group_ids, logger)
|
||||
if secret_name:
|
||||
# todo: здесь читается секрет, но, если _set_group_and_accounts вызывается из обработчика секрета,
|
||||
# то повторно читать секрет может быть лишним, можно сделать два метода _set_group_and_accounts,
|
||||
# использовать set_group_id_in_status_by_patch вместо set_group_id_in_status_by_client,
|
||||
# если _set_group_and_accounts вызывается из обработчика KeycloakGroup ComponentLink
|
||||
# (чтобы не вызывать лишний раз patch)
|
||||
usernames, emails, robots = get_accounts_from_secret(api_client, secret_name, user_namespace,
|
||||
user_registry_name, logger)
|
||||
set_group_accounts(group_ids, usernames, emails, robots, OIDC_MODIFY, logger)
|
||||
else:
|
||||
set_group_accounts(group_ids, None, None, None, OIDC_MODIFY, logger)
|
||||
|
||||
|
||||
def _unset_group_accounts(group_ids, logger):
|
||||
set_group_accounts(group_ids, None, None, None, OIDC_MODIFY, logger)
|
||||
|
||||
|
||||
def _set_keycloak_group_link_targets_joint_secrets(spec, namespace, exclude_links_names, logger):
|
||||
app = spec['platformAppName']
|
||||
targets = spec['targets']
|
||||
for target_spec in targets:
|
||||
target = target_spec['name']
|
||||
target_namespace = target_spec['platformAppName'] if 'platformAppName' in target_spec else app
|
||||
target_kind = target_spec.get('kind')
|
||||
create_or_update_groups_joint_secret_and_restart_proxy(api_client=api_client,
|
||||
target=target,
|
||||
target_namespace=target_namespace,
|
||||
target_kind=target_kind,
|
||||
user_namespace=namespace,
|
||||
logger=logger,
|
||||
exclude_links_names=exclude_links_names)
|
||||
|
||||
|
||||
def _set_basic_auth_link_targets_joint_secrets(spec, namespace, exclude_links_names, logger):
|
||||
app = spec['platformAppName']
|
||||
targets = spec['targets']
|
||||
for target_spec in targets:
|
||||
target = target_spec['name']
|
||||
target_namespace = target_spec['platformAppName'] if 'platformAppName' in target_spec else app
|
||||
target_kind = target_spec.get('kind')
|
||||
create_or_update_basic_auth_joint_secret(api_client=api_client,
|
||||
target=target,
|
||||
target_namespace=target_namespace,
|
||||
target_kind=target_kind,
|
||||
user_namespace=namespace,
|
||||
logger=logger,
|
||||
exclude_links_names=exclude_links_names)
|
||||
|
||||
|
||||
def _set_many_basic_auth_links_targets_joint_secrets(ba_links, namespace, exclude_links, logger):
|
||||
|
||||
all_targets = set()
|
||||
|
||||
def get_target_kind(_target_spec):
|
||||
_target_kind = _target_spec.get('kind')
|
||||
if _target_kind == TARGET_KIND_API_COMPONENT:
|
||||
_target_kind = None
|
||||
return _target_kind
|
||||
|
||||
for ba_link in ba_links:
|
||||
link_spec = ba_link['spec']
|
||||
link_app = link_spec['platformAppName']
|
||||
link_targets = link_spec['targets']
|
||||
targets = {(t['name'],
|
||||
t['platformAppName'] if 'platformAppName' in t else link_app,
|
||||
get_target_kind(t)) for t in link_targets}
|
||||
all_targets = all_targets | targets
|
||||
|
||||
exclude_links_names = [ba_link['metadata']['name'] for ba_link in exclude_links] if exclude_links else None
|
||||
|
||||
for target, target_namespace, target_kind in all_targets:
|
||||
create_or_update_basic_auth_joint_secret(api_client=api_client,
|
||||
target=target,
|
||||
target_namespace=target_namespace,
|
||||
target_kind=target_kind,
|
||||
user_namespace=namespace,
|
||||
logger=logger,
|
||||
exclude_links_names=exclude_links_names)
|
||||
|
||||
|
||||
def _set_config_link_secret(spec, name, namespace, logger):
|
||||
dest_namespace = spec['platformAppName']
|
||||
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
if secret_name:
|
||||
copy_secret_silently(api_client=api_client,
|
||||
source_secret_name=secret_name,
|
||||
source_namespace=namespace,
|
||||
dest_secret_name=name,
|
||||
dest_namespace=dest_namespace,
|
||||
logger=logger)
|
||||
|
||||
|
||||
def _set_config_link_secret_from_secret(spec, name, source_secret_body, logger):
|
||||
dest_namespace = spec['platformAppName']
|
||||
|
||||
copy_secret_data(api_client=api_client,
|
||||
source_secret_body=source_secret_body,
|
||||
dest_secret_name=name,
|
||||
dest_namespace=dest_namespace,
|
||||
logger=logger)
|
||||
|
||||
|
||||
def _delete_config_link_secret(spec, name, logger):
|
||||
secret_namespace = spec['platformAppName']
|
||||
delete_secret_if_exists(api_client, name, secret_namespace, logger)
|
||||
|
||||
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_keycloak_group_link]))
|
||||
def create_keycloak_group_link(spec, name, namespace, body, patch, logger, **_):
|
||||
logger.debug(f"A create_keycloak_group_link handler is called with body: {body}")
|
||||
|
||||
@kopf.subhandler(id=f'set-group-and-accounts-{namespace}.{name}')
|
||||
def _set_group_and_accounts_handler(**_):
|
||||
_set_group_and_accounts(name, namespace, spec, logger)
|
||||
|
||||
@kopf.subhandler(id=f'update-keycloak-group-secret-{namespace}.{name}')
|
||||
def _set_keycloak_group_secret_anno_and_label_handler(**_):
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
if secret_name:
|
||||
_set_keycloak_group_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'set-targets-joint-secrets-{namespace}.{name}')
|
||||
def _create_or_update_joint_secrets_handler(**_):
|
||||
_set_keycloak_group_link_targets_joint_secrets(spec, namespace, None, logger)
|
||||
|
||||
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_keycloak_group_link]))
|
||||
def delete_keycloak_group_link(spec, name, namespace, status, body, logger, **_):
|
||||
logger.debug(f"A delete_keycloak_group_link handler is called with body: {body}")
|
||||
|
||||
group_ids = status.get('keycloakGroup', {}).get('groupIds')
|
||||
if group_ids:
|
||||
delete_group(group_ids, logger, OIDC_MODIFY)
|
||||
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
if secret_name:
|
||||
_unset_keycloak_group_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
_set_keycloak_group_link_targets_joint_secrets(spec, namespace, {name}, logger)
|
||||
|
||||
|
||||
def _secret_is_changed(diff, **_):
|
||||
secret_ref_changed = any(d[1] == ('spec', 'secretRef') for d in diff)
|
||||
secret_name_changed = any(d[1] == ('spec', 'secretRef', 'name') for d in diff)
|
||||
return secret_ref_changed or secret_name_changed
|
||||
|
||||
|
||||
def _secret_is_not_changed(diff, **_):
|
||||
return not _secret_is_changed(diff)
|
||||
|
||||
|
||||
def _app_name_or_targets_are_changed(diff, **_):
|
||||
targets_changed = any(d[1] == ('spec', 'targets') for d in diff)
|
||||
app_changed = any(d[1] == ('spec', 'platformAppName') for d in diff)
|
||||
return targets_changed or app_changed
|
||||
|
||||
|
||||
def _app_name_is_changed(diff, **_):
|
||||
app_changed = any(d[1] == ('spec', 'platformAppName') for d in diff)
|
||||
return app_changed
|
||||
|
||||
|
||||
@kopf.on.update('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_keycloak_group_link,
|
||||
_secret_is_changed]))
|
||||
def update_keycloak_group_link(old, new, status, name, namespace, body, patch, logger, **_):
|
||||
logger.debug(f"A update_keycloak_group_link handler is called with body: {body}")
|
||||
|
||||
old_spec = old['spec']
|
||||
new_spec = new['spec']
|
||||
|
||||
secret_name = get_secret_name_if_set(new_spec)
|
||||
old_secret_name = get_secret_name_if_set(old_spec)
|
||||
|
||||
# установлен фильтр _secret_is_changed, значит Secret изменился - нужно обновить состав группы и установить метку;
|
||||
# set_group, update_secrets - отдельные subhandlers, потому что могут выполняться независимо друг от друга;
|
||||
|
||||
@kopf.subhandler(id=f'set-group-and-accounts-{namespace}.{name}')
|
||||
def _set_group_and_accounts_handler(**_):
|
||||
_set_group_and_accounts(name, namespace, new_spec, logger)
|
||||
|
||||
@kopf.subhandler(id=f'update-keycloak-group-secrets-{namespace}.{name}')
|
||||
def _update_keycloak_group_secrets_anno_and_label_handler(**_):
|
||||
if old_secret_name is not None:
|
||||
_unset_keycloak_group_secret_anno_and_label(name, old_secret_name, namespace, logger)
|
||||
if secret_name:
|
||||
_set_keycloak_group_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
|
||||
@kopf.on.update('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_keycloak_group_link,
|
||||
_app_name_or_targets_are_changed]))
|
||||
def update_keycloak_group_link_targets(old, new, status, name, namespace, body, patch, logger, **_):
|
||||
old_spec = old['spec']
|
||||
new_spec = new['spec']
|
||||
|
||||
_set_keycloak_group_link_targets_joint_secrets(old_spec, namespace, None, logger)
|
||||
_set_keycloak_group_link_targets_joint_secrets(new_spec, namespace, None, logger)
|
||||
|
||||
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_basic_auth_link]))
|
||||
def create_basic_auth_link(spec, name, namespace, body, patch, logger, **_):
|
||||
logger.debug(f"A create_basic_auth_link handler is called with body: {body}")
|
||||
|
||||
@kopf.subhandler(id=f'update-basic-auth-secret-{namespace}.{name}')
|
||||
def _set_basic_auth_secret_anno_and_label_handler(**_):
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
if secret_name:
|
||||
_set_basic_auth_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'set-targets-joint-secrets-{namespace}.{name}')
|
||||
def _create_or_update_joint_secrets_handler(**_):
|
||||
_set_basic_auth_link_targets_joint_secrets(spec, namespace, None, logger)
|
||||
|
||||
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_basic_auth_link]))
|
||||
def delete_basic_auth_link(spec, name, namespace, status, body, logger, **_):
|
||||
logger.debug(f"A delete_basic_auth_link handler is called with body: {body}")
|
||||
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
if secret_name:
|
||||
_unset_basic_auth_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
_set_basic_auth_link_targets_joint_secrets(spec, namespace, {name}, logger)
|
||||
|
||||
|
||||
@kopf.on.update('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_basic_auth_link,
|
||||
kopf.any_([_secret_is_changed, _app_name_or_targets_are_changed])
|
||||
]))
|
||||
def update_basic_auth_link(old, new, status, name, namespace, body, patch, logger, **_):
|
||||
logger.debug(f"A update_basic_auth_link handler is called with body: {body}")
|
||||
|
||||
old_spec = old['spec']
|
||||
new_spec = new['spec']
|
||||
|
||||
secret_name = get_secret_name_if_set(new_spec)
|
||||
old_secret_name = get_secret_name_if_set(old_spec)
|
||||
|
||||
# отдельные subhandlers, потому что могут выполняться независимо друг от друга
|
||||
@kopf.subhandler(id=f'update-basic-auth-secrets-{namespace}.{name}')
|
||||
def _update_basic_auth_secrets_anno_and_label_handler(**_):
|
||||
if secret_name == old_secret_name:
|
||||
return
|
||||
if old_secret_name is not None:
|
||||
_unset_basic_auth_secret_anno_and_label(name, old_secret_name, namespace, logger)
|
||||
if secret_name:
|
||||
_set_basic_auth_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'set-targets-joint-secrets-{namespace}.{name}')
|
||||
def _create_or_update_joint_secrets_handler(**_):
|
||||
_set_basic_auth_link_targets_joint_secrets(old_spec, namespace, None, logger)
|
||||
_set_basic_auth_link_targets_joint_secrets(new_spec, namespace, None, logger)
|
||||
|
||||
|
||||
@kopf.on.update('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_basic_auth_link,
|
||||
kopf.any_([_secret_is_not_changed, _app_name_or_targets_are_changed])
|
||||
]))
|
||||
def update_basic_auth_link_targets(old, new, status, name, namespace, body, patch, logger, **_):
|
||||
logger.debug(f"A update_basic_auth_link_targets handler is called with body: {body}")
|
||||
|
||||
old_spec = old['spec']
|
||||
new_spec = new['spec']
|
||||
|
||||
_set_basic_auth_link_targets_joint_secrets(old_spec, namespace, None, logger)
|
||||
_set_basic_auth_link_targets_joint_secrets(new_spec, namespace, None, logger)
|
||||
|
||||
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_config_link]),
|
||||
field='spec.secretRef',
|
||||
value=kopf.PRESENT)
|
||||
# !!! если в обработчике on.create при использовании параметра field не указать value=kopf.PRESENT,
|
||||
# то в аргумент spec будет передано только значение атрибута, указанного в field
|
||||
def create_config_link_with_secret(spec, name, namespace, body, logger, **_):
|
||||
logger.debug(f"A create_config_link_with_secret handler is called with body: {body}")
|
||||
|
||||
@kopf.subhandler(id=f'update-config-secret-{namespace}.{name}')
|
||||
def _set_config_secret_anno_and_label_handler(**_):
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
_set_config_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'set-config-secret-{namespace}.{name}')
|
||||
def _copy_secret_handler(**_):
|
||||
_set_config_link_secret(spec, name, namespace, logger)
|
||||
|
||||
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_config_link]),
|
||||
field='spec.secretRef',
|
||||
value=kopf.PRESENT)
|
||||
# !!! если в обработчике on.update при использовании параметра field не указать value=kopf.PRESENT,
|
||||
# то в аргумент spec будет передано только значение атрибута, указанного в field
|
||||
def delete_config_link_with_secret(spec, name, namespace, body, logger, **_):
|
||||
logger.debug(f"A delete_config_link_with_secret handler is called with body: {body}")
|
||||
|
||||
secret_name = get_secret_name_if_set(spec)
|
||||
_unset_config_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
_delete_config_link_secret(spec, name, logger)
|
||||
|
||||
|
||||
@kopf.on.update('unified-platform.cs.hse.ru', 'componentlinks',
|
||||
when=kopf.all_([not_dev_namespace,
|
||||
in_pu_namespace,
|
||||
is_config_link,
|
||||
kopf.any_([_secret_is_changed, _app_name_is_changed])]))
|
||||
def update_config_link_with_secret(old, new, name, namespace, body, logger, **_):
|
||||
logger.debug(f"A update_config_link_with_secret handler is called with body: {body}")
|
||||
|
||||
old_spec = old['spec']
|
||||
new_spec = new['spec']
|
||||
|
||||
secret_name = get_secret_name_if_set(new_spec)
|
||||
old_secret_name = get_secret_name_if_set(old_spec)
|
||||
old_app = old_spec['platformAppName']
|
||||
new_app = new_spec['platformAppName']
|
||||
|
||||
@kopf.subhandler(id=f'update-config-secrets-{namespace}.{name}')
|
||||
def _update_config_secrets_anno_and_label_handler(**_):
|
||||
if secret_name == old_secret_name:
|
||||
return
|
||||
if old_secret_name is not None:
|
||||
_unset_config_secret_anno_and_label(name, old_secret_name, namespace, logger)
|
||||
if secret_name:
|
||||
_set_config_secret_anno_and_label(name, secret_name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'set-config-secret-{namespace}.{name}')
|
||||
def _copy_secret_handler(**_):
|
||||
if old_app != new_app:
|
||||
_delete_config_link_secret(old_spec, name, logger)
|
||||
_set_config_link_secret(new_spec, name, namespace, logger)
|
||||
|
||||
|
||||
def _get_keycloak_group_links_given_secret(secret_name, secret_namespace):
|
||||
def predicate(link):
|
||||
link_spec = link['spec']
|
||||
if 'keycloakGroup' not in link_spec:
|
||||
return False
|
||||
link_secret = get_secret_name_from_link(link)
|
||||
return secret_name == link_secret
|
||||
|
||||
filtered_list = get_links_given_predicate(api_client, secret_namespace, predicate)
|
||||
return filtered_list
|
||||
|
||||
|
||||
def _get_config_links_given_secret(secret_name, secret_namespace):
|
||||
def predicate(link):
|
||||
link_spec = link['spec']
|
||||
if 'config' not in link_spec:
|
||||
return False
|
||||
link_secret = get_secret_name_from_link(link)
|
||||
return secret_name == link_secret
|
||||
|
||||
filtered_list = get_links_given_predicate(api_client, secret_namespace, predicate)
|
||||
return filtered_list
|
||||
|
||||
|
||||
def _get_basic_auth_links_given_secret(secret_name, secret_namespace):
|
||||
def predicate(link):
|
||||
link_spec = link['spec']
|
||||
if 'basicAuth' not in link_spec:
|
||||
return False
|
||||
link_secret = get_secret_name_from_link(link)
|
||||
return secret_name == link_secret
|
||||
|
||||
filtered_list = get_links_given_predicate(api_client, secret_namespace, predicate)
|
||||
return filtered_list
|
||||
|
||||
|
||||
@kopf.on.create('', 'secrets',
|
||||
when=kopf.all_([not_dev_namespace, in_pu_namespace]))
|
||||
def create_link_secret(spec, name, namespace, body, logger, **_):
|
||||
kg_links = _get_keycloak_group_links_given_secret(name, namespace)
|
||||
if kg_links:
|
||||
for config_link in kg_links:
|
||||
link_spec = config_link['spec']
|
||||
link_name = config_link['metadata']['name']
|
||||
|
||||
@kopf.subhandler(id=f'set-keycloak-group-group-and-accounts-{namespace}.{link_name}')
|
||||
def _set_keycloak_group_and_accounts_handler(**_):
|
||||
_set_group_and_accounts(link_name, namespace, link_spec, logger)
|
||||
|
||||
@kopf.subhandler(id=f'update-keycloak-group-secret-{namespace}.{link_name}')
|
||||
def _set_keycloak_group_secret_anno_and_label_handler(**_):
|
||||
_set_keycloak_group_secret_anno_and_label(link_name, name, namespace, logger)
|
||||
|
||||
ba_links = _get_basic_auth_links_given_secret(name, namespace)
|
||||
if ba_links:
|
||||
@kopf.subhandler(id=f'update-many-basic-auth-secrets-{namespace}.{name}')
|
||||
def _set_basic_auth_group_secret_anno_and_label_handler(**_):
|
||||
_set_many_basic_auth_secrets_anno_and_label(ba_links, name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'set-many-ba-targets-joint-secrets-{namespace}.{name}')
|
||||
def _create_or_update_basic_auth_joint_secrets_handler(**_):
|
||||
_set_many_basic_auth_links_targets_joint_secrets(ba_links, namespace, None, logger)
|
||||
|
||||
config_links = _get_config_links_given_secret(name, namespace)
|
||||
if config_links:
|
||||
for config_link in config_links:
|
||||
link_spec = config_link['spec']
|
||||
link_name = config_link['metadata']['name']
|
||||
|
||||
@kopf.subhandler(id=f'update-config-secret-{namespace}.{link_name}')
|
||||
def _set_config_secret_anno_and_label_handler(**_):
|
||||
_set_config_secret_anno_and_label(link_name, name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'set-config-secret-{namespace}.{link_name}')
|
||||
def _copy_secret_handler(**_):
|
||||
_set_config_link_secret_from_secret(link_spec, link_name, body, logger)
|
||||
|
||||
|
||||
def _get_links_given_secret_metadata(metadata, namespace, anno, link_type, logger):
|
||||
if 'annotations' not in metadata:
|
||||
return None
|
||||
annotations = metadata.get('annotations')
|
||||
links_names = [a[len(anno) + 1:]
|
||||
for a in annotations
|
||||
if a.startswith(anno)]
|
||||
links = get_links_if_exist(api_client, links_names, namespace, logger)
|
||||
filtered = []
|
||||
for link in links:
|
||||
link_spec = link['spec']
|
||||
link_name = link['metadata']['name']
|
||||
if link_type in link_spec:
|
||||
filtered.append(link)
|
||||
else:
|
||||
logger.warning(f'ComponentLink "{namespace}.{link_name}" is not a {link_type}, but expected')
|
||||
return filtered
|
||||
|
||||
|
||||
def _get_keycloak_group_links_given_secret_metadata(metadata, namespace, logger):
|
||||
return _get_links_given_secret_metadata(metadata, namespace, _KEYCLOAK_GROUP_ANNO_PREFIX,
|
||||
'keycloakGroup', logger)
|
||||
|
||||
|
||||
def _get_basic_auth_links_given_secret_metadata(metadata, namespace, logger):
|
||||
return _get_links_given_secret_metadata(metadata, namespace, _BASIC_AUTH_ANNO_PREFIX,
|
||||
'basicAuth', logger)
|
||||
|
||||
|
||||
def _get_config_links_given_secret_metadata(metadata, namespace, logger):
|
||||
return _get_links_given_secret_metadata(metadata, namespace, _CONFIG_ANNO_PREFIX,
|
||||
'config', logger)
|
||||
|
||||
|
||||
@kopf.on.update('', 'secrets',
|
||||
when=kopf.all_([not_dev_namespace, in_pu_namespace]),
|
||||
labels={_KEYCLOAK_GROUP_LABEL: 'true'},
|
||||
field='data')
|
||||
def update_keycloak_group_link_secret(spec, name, namespace, body, logger, **_):
|
||||
metadata = body['metadata']
|
||||
kg_links = _get_keycloak_group_links_given_secret_metadata(metadata, namespace, logger)
|
||||
if not kg_links:
|
||||
return
|
||||
|
||||
for kg_link in kg_links:
|
||||
link_spec = kg_link['spec']
|
||||
link_name = kg_link['metadata']['name']
|
||||
|
||||
@kopf.subhandler(id=f'set-keycloak-group-and-accounts-{namespace}.{link_name}')
|
||||
def _set_keycloak_group_and_accounts_handler(**_):
|
||||
_set_group_and_accounts(link_name, namespace, link_spec, logger)
|
||||
|
||||
|
||||
@kopf.on.update('', 'secrets',
|
||||
when=kopf.all_([not_dev_namespace, in_pu_namespace]),
|
||||
labels={_BASIC_AUTH_LABEL: 'true'},
|
||||
field='data')
|
||||
@kopf.on.delete('', 'secrets',
|
||||
when=kopf.all_([not_dev_namespace, in_pu_namespace]),
|
||||
labels={_BASIC_AUTH_LABEL: 'true'},
|
||||
param={'event': 'delete'})
|
||||
def update_or_delete_basic_auth_link_secret(spec, name, namespace, body, param, logger, **_):
|
||||
metadata = body['metadata']
|
||||
ba_links = _get_basic_auth_links_given_secret_metadata(metadata, namespace, logger)
|
||||
if not ba_links:
|
||||
return
|
||||
|
||||
exclude_links = ba_links if 'event' in param and param['event'] == 'delete' else None
|
||||
|
||||
@kopf.subhandler(id=f'set-many-ba-targets-joint-secrets-{namespace}.{name}')
|
||||
def _create_or_update_basic_auth_joint_secrets_handler(**_):
|
||||
_set_many_basic_auth_links_targets_joint_secrets(ba_links, namespace, exclude_links, logger)
|
||||
|
||||
|
||||
@kopf.on.update('', 'secrets',
|
||||
when=kopf.all_([not_dev_namespace, in_pu_namespace]),
|
||||
labels={_CONFIG_LABEL: 'true'},
|
||||
field='data')
|
||||
def update_config_link_secret(spec, name, namespace, body, logger, **_):
|
||||
metadata = body['metadata']
|
||||
config_links = _get_config_links_given_secret_metadata(metadata, namespace, logger)
|
||||
if not config_links:
|
||||
return
|
||||
|
||||
for config_link in config_links:
|
||||
link_spec = config_link['spec']
|
||||
link_name = config_link['metadata']['name']
|
||||
|
||||
@kopf.subhandler(id=f'set-config-secret-{namespace}.{link_name}')
|
||||
def _copy_secret_handler(**_):
|
||||
_set_config_link_secret_from_secret(link_spec, link_name, body, logger)
|
||||
|
||||
|
||||
@kopf.on.delete('', 'secrets',
|
||||
when=kopf.all_([not_dev_namespace, in_pu_namespace]),
|
||||
labels={_KEYCLOAK_GROUP_LABEL: 'true'})
|
||||
def delete_keycloak_group_link_secret(spec, name, namespace, body, logger, **_):
|
||||
metadata = body['metadata']
|
||||
kg_links = _get_keycloak_group_links_given_secret_metadata(metadata, namespace, logger)
|
||||
if not kg_links:
|
||||
return
|
||||
|
||||
for kg_link in kg_links:
|
||||
link_status = kg_link.get('status', {})
|
||||
link_name = kg_link['metadata']['name']
|
||||
|
||||
@kopf.subhandler(id=f'unset-keycloak-group-accounts-{namespace}.{link_name}')
|
||||
def _unset_keycloak_group_accounts_handler(**_):
|
||||
group_ids = link_status.get('keycloakGroup', {}).get('groupIds')
|
||||
if group_ids:
|
||||
_unset_group_accounts(group_ids, logger)
|
||||
|
||||
|
||||
@kopf.on.delete('', 'secrets',
|
||||
when=kopf.all_([not_dev_namespace, in_pu_namespace]),
|
||||
labels={_CONFIG_LABEL: 'true'})
|
||||
def delete_config_link_secret(spec, name, namespace, body, logger, **_):
|
||||
metadata = body['metadata']
|
||||
config_links = _get_config_links_given_secret_metadata(metadata, namespace, logger)
|
||||
if not config_links:
|
||||
return
|
||||
|
||||
for config_link in config_links:
|
||||
link_spec = config_link['spec']
|
||||
link_name = config_link['metadata']['name']
|
||||
|
||||
@kopf.subhandler(id=f'delete-config-secret-{namespace}.{link_name}')
|
||||
def _delete_config_secret_handler(**_):
|
||||
_delete_config_link_secret(link_spec, link_name, logger)
|
||||
|
||||
|
||||
# todo: реализация обработчиков config link для secret позволяет
|
||||
# реализовать по аналогии обработчики config link для config map
|
||||
# (буквально аналогичные обработчики, никак не пересекаются с Secret,
|
||||
# в обработчике update Secret возможная поддержка ConfigMap уже заложена)
|
||||
|
||||
|
||||
|
||||
|
||||
136
controller/src/cmplink/keycloak_group_link.py
Normal file
136
controller/src/cmplink/keycloak_group_link.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ComponentLink
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
from kubernetes.client import ApiClient
|
||||
|
||||
from basic_resources.deployments import restart_deployment_if_exists
|
||||
from basic_resources.secrets import read_secret_if_exists, prepare_secret_manifest, create_or_update_secret
|
||||
from cmplink.keycloak_groups import get_accounts_from_secret_attr, get_full_robot_name, get_full_group_path
|
||||
from cmplink.link import get_links_given_predicate, get_target_resource_if_exists, does_target_specified_in_link, \
|
||||
TARGET_KIND_TO_CODE
|
||||
|
||||
|
||||
def get_accounts_from_secret(api_client: ApiClient, secret_name, namespace, user_registry_name, logger):
|
||||
secret = read_secret_if_exists(api_client, secret_name, namespace)
|
||||
if not secret:
|
||||
logger.info(f'KeycloakGroup link secret {namespace}.{secret_name} '
|
||||
f'does not exist')
|
||||
return (None,) * 3
|
||||
|
||||
secret_data = secret.to_dict()['data']
|
||||
usernames = None
|
||||
emails = None
|
||||
robots = None
|
||||
if 'usernames' in secret_data:
|
||||
usernames = get_accounts_from_secret_attr(secret_data, 'usernames')
|
||||
if 'emails' in secret_data:
|
||||
emails = get_accounts_from_secret_attr(secret_data, 'emails')
|
||||
if 'robots' in secret_data:
|
||||
robots = get_accounts_from_secret_attr(secret_data, 'robots')
|
||||
robots = [get_full_robot_name(namespace, user_registry_name, r) for r in robots]
|
||||
result = usernames, emails, robots
|
||||
if not any(result):
|
||||
logger.warning(f'No accounts found in KeycloakGroup link Secret {namespace}.{secret_name}')
|
||||
return result
|
||||
|
||||
|
||||
def _get_keycloak_group_links_given_target(api_client: ApiClient, target, target_namespace,
|
||||
target_kind, user_namespace):
|
||||
def predicate(link):
|
||||
link_spec = link['spec']
|
||||
if 'keycloakGroup' not in link_spec:
|
||||
return False
|
||||
target_specified = does_target_specified_in_link(target, target_namespace, target_kind, link_spec)
|
||||
return target_specified
|
||||
|
||||
filtered_list = get_links_given_predicate(api_client, user_namespace, predicate)
|
||||
return filtered_list
|
||||
|
||||
|
||||
def _build_groups_names_given_target(api_client: ApiClient, target, target_namespace,
|
||||
target_kind, user_namespace, exclude_links_names):
|
||||
links_list = _get_keycloak_group_links_given_target(api_client, target, target_namespace,
|
||||
target_kind, user_namespace)
|
||||
links_names = {link['metadata']['name'] for link in links_list}
|
||||
links_names = links_names - exclude_links_names
|
||||
groups_names = {'/' + get_full_group_path(name, user_namespace) for name in links_names}
|
||||
return groups_names
|
||||
|
||||
|
||||
def _get_groups_names_from_direct_spec(api_client: ApiClient, target, target_namespace, target_kind,
|
||||
user_namespace, logger):
|
||||
api_obj = get_target_resource_if_exists(api_client, target, target_namespace, target_kind, logger)
|
||||
if not api_obj:
|
||||
return set()
|
||||
|
||||
api_spec = api_obj['spec']
|
||||
restful_api = api_spec['restfulApi']
|
||||
auth = restful_api['auth']
|
||||
oidc = auth.get('oidc')
|
||||
if not oidc:
|
||||
return set()
|
||||
groups = oidc.get('groups')
|
||||
if not groups:
|
||||
return set()
|
||||
|
||||
groups_names = {'/' + get_full_group_path(name, user_namespace) for name in groups if not name.startswith('/')}
|
||||
groups_names = groups_names | {name for name in groups if name.startswith('/')}
|
||||
return groups_names
|
||||
|
||||
|
||||
def get_groups_joint_secret_name(target_name, target_kind=None):
|
||||
secret_name = target_name + '-oidc-joint-cred' if not target_kind \
|
||||
else f'{target_name}-{TARGET_KIND_TO_CODE[target_kind]}-oidc-joint-cred'
|
||||
return secret_name
|
||||
|
||||
|
||||
def get_oauth2_proxy_deployment_name(target_name, target_kind=None):
|
||||
deployment_name = f'{target_name}-oauth2-deployment' if not target_kind \
|
||||
else f'{target_name}-{TARGET_KIND_TO_CODE[target_kind]}-oauth2-deployment'
|
||||
return deployment_name
|
||||
|
||||
|
||||
def _restart_oauth2_proxy(api_client: ApiClient, target, target_namespace, target_kind, logger):
|
||||
deployment_name = get_oauth2_proxy_deployment_name(target, target_kind)
|
||||
restart_deployment_if_exists(api_client, deployment_name, target_namespace, logger)
|
||||
|
||||
|
||||
def create_or_update_groups_joint_secret(api_client: ApiClient, target, target_namespace,
|
||||
user_namespace, logger, exclude_links_names=None,
|
||||
target_kind=None):
|
||||
# exclude_links_names нужно, потому что обработчик удаления ссылки отрабатывает перед фактическим удалением -
|
||||
# в списке прочитанных ссылок будет "удаляемая" ссылка
|
||||
if exclude_links_names is None:
|
||||
exclude_links_names = set()
|
||||
full_groups_names = _build_groups_names_given_target(api_client, target, target_namespace,
|
||||
target_kind, user_namespace, exclude_links_names)
|
||||
full_groups_names_to_add = _get_groups_names_from_direct_spec(api_client, target, target_namespace,
|
||||
target_kind, user_namespace, logger)
|
||||
full_groups_names = list(full_groups_names | full_groups_names_to_add)
|
||||
secret_data = [{'key': 'groups', 'value': '"' + '\\n'.join(full_groups_names) + '\\n"'}]
|
||||
|
||||
secret_name = get_groups_joint_secret_name(target, target_kind)
|
||||
|
||||
manifest = prepare_secret_manifest(name=secret_name,
|
||||
namespace=target_namespace,
|
||||
type_='Opaque',
|
||||
data=secret_data,
|
||||
data_attr='stringData')
|
||||
|
||||
create_or_update_secret(api_client=api_client,
|
||||
name=secret_name,
|
||||
namespace=target_namespace,
|
||||
manifest=manifest,
|
||||
logger=logger)
|
||||
|
||||
|
||||
def create_or_update_groups_joint_secret_and_restart_proxy(api_client: ApiClient, target, target_namespace,
|
||||
target_kind, user_namespace, logger,
|
||||
exclude_links_names=None):
|
||||
create_or_update_groups_joint_secret(api_client, target, target_namespace,
|
||||
user_namespace, logger, exclude_links_names,
|
||||
target_kind)
|
||||
_restart_oauth2_proxy(api_client, target, target_namespace, target_kind, logger)
|
||||
612
controller/src/cmplink/keycloak_groups.py
Normal file
612
controller/src/cmplink/keycloak_groups.py
Normal file
|
|
@ -0,0 +1,612 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ComponentLink
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
import secrets
|
||||
import string
|
||||
|
||||
import requests
|
||||
|
||||
from config import OIDC_KEYCLOAK_HOST, OIDC_ROBOTS_REALM, OIDC_ROBOTS_ADMIN_CLIENT_ID, OIDC_ROBOTS_ADMIN_CLIENT_SECRET, \
|
||||
OIDC_MANAGED_GROUPS_ROOT, OIDC_CLUSTER_GROUP_NAME, OIDC_END_USERS_REALM, OIDC_END_USERS_ADMIN_CLIENT_ID, \
|
||||
OIDC_END_USERS_ADMIN_CLIENT_SECRET, OIDC_ROBOTS_CLIENT_ID
|
||||
from exceptions import PlatformServiceTemporaryError
|
||||
|
||||
_OIDC_GROUPS_URL_TEMPLATE = f'{OIDC_KEYCLOAK_HOST}/admin/realms/{{0}}/groups'
|
||||
_OIDC_GROUP_BY_PATH_URL_TEMPLATE = f'{OIDC_KEYCLOAK_HOST}/admin/realms/{{0}}/group-by-path'
|
||||
_OIDC_USERS_URL_TEMPLATE = f'{OIDC_KEYCLOAK_HOST}/admin/realms/{{0}}/users'
|
||||
|
||||
|
||||
def auth_keycloak_robots():
|
||||
# todo: оптимизировать выпуск и хранение токенов, сейчас токенов выпускается слишком много
|
||||
# плюс ожно оптимизировать - обновлять токен только по истечению
|
||||
auth_url = f'{OIDC_KEYCLOAK_HOST}/realms/{OIDC_ROBOTS_REALM}/protocol/openid-connect/token'
|
||||
data = {'client_id': OIDC_ROBOTS_ADMIN_CLIENT_ID,
|
||||
'client_secret': OIDC_ROBOTS_ADMIN_CLIENT_SECRET,
|
||||
'grant_type': 'client_credentials'}
|
||||
token = _auth_keycloak(auth_url, data)
|
||||
return token
|
||||
|
||||
|
||||
def auth_keycloak_end_users():
|
||||
auth_url = f'{OIDC_KEYCLOAK_HOST}/realms/{OIDC_END_USERS_REALM}/protocol/openid-connect/token'
|
||||
data = {'client_id': OIDC_END_USERS_ADMIN_CLIENT_ID,
|
||||
'client_secret': OIDC_END_USERS_ADMIN_CLIENT_SECRET,
|
||||
'grant_type': 'client_credentials'}
|
||||
token = _auth_keycloak(auth_url, data)
|
||||
return token
|
||||
|
||||
|
||||
def _auth_keycloak(auth_url, data):
|
||||
res = requests.post(auth_url, data=data)
|
||||
if res.status_code != 200:
|
||||
raise PlatformServiceTemporaryError(f'Cannot authenticate. API response text: {res.text}')
|
||||
res_data = res.json()
|
||||
return res_data['access_token']
|
||||
|
||||
|
||||
def get_full_robot_name_prefix(user_namespace, user_registry_name):
|
||||
return f'robot_{user_namespace}+{user_registry_name}'
|
||||
|
||||
|
||||
def get_full_robot_name(user_namespace, user_registry_name, name):
|
||||
prefix = get_full_robot_name_prefix(user_namespace, user_registry_name)
|
||||
return f'{prefix}+{name}'
|
||||
|
||||
|
||||
def get_full_group_path(group_name, user_namespace):
|
||||
group_path = f'{OIDC_MANAGED_GROUPS_ROOT}/{OIDC_CLUSTER_GROUP_NAME}/{user_namespace}/{group_name}'
|
||||
return group_path
|
||||
|
||||
|
||||
def _get_parent_group(realm, parent_path, headers, logger):
|
||||
parent_group = _get_group_by_path_if_exists(realm, parent_path, headers, logger)
|
||||
if not parent_group:
|
||||
raise PlatformServiceTemporaryError(f'Parent group "{parent_path}" not found in realm "{realm}".')
|
||||
return parent_group
|
||||
|
||||
|
||||
def _get_group_by_path_if_exists(realm, group_path, headers, logger):
|
||||
group_by_path_url = f'{_OIDC_GROUP_BY_PATH_URL_TEMPLATE.format(realm)}/{group_path}'
|
||||
res = requests.get(group_by_path_url, headers=headers)
|
||||
if res.status_code == 404:
|
||||
logger.warning(f'Group "{group_path}" doesnt exist in realm "{realm}')
|
||||
return None
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot get group "{group_path}" in realm "{OIDC_ROBOTS_REALM}". '
|
||||
f'Response: {res.text}')
|
||||
group = res.json()
|
||||
return group
|
||||
|
||||
|
||||
def _create_group(realm, group_path, group_name, parent_group_id, headers):
|
||||
group_by_path_url = f'{_OIDC_GROUP_BY_PATH_URL_TEMPLATE.format(realm)}/{group_path}'
|
||||
|
||||
group = {'name': group_name}
|
||||
if parent_group_id:
|
||||
groups_children_url = f'{_OIDC_GROUPS_URL_TEMPLATE.format(realm)}/{parent_group_id}/children'
|
||||
res = requests.post(groups_children_url, json=group, headers=headers) # 204 ok
|
||||
else:
|
||||
res = requests.post(_OIDC_GROUPS_URL_TEMPLATE.format(realm), json=group, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot create group "{group_path}" in realm "{realm}". '
|
||||
f'Response: {res.text}')
|
||||
res = requests.get(group_by_path_url, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot create group "{group_path}" in realm "{realm}". '
|
||||
f'Response: {res.text}')
|
||||
return res
|
||||
|
||||
|
||||
def _parse_group_path(group_path):
|
||||
group_path = group_path.strip('/')
|
||||
parts = group_path.split('/')
|
||||
group_name = parts[-1]
|
||||
parent_path = '/'.join(parts[:-1]) if len(parts) > 1 else None
|
||||
return group_name, parent_path
|
||||
|
||||
|
||||
def _set_group(realm, access_token, group_path, logger, modify=False):
|
||||
"""
|
||||
Проверяет, что группа по пути group_path есть.
|
||||
Если, группа отсутствует и modify=True, то создает группу по пути group_path.
|
||||
|
||||
Возвращает id существующей или созданной группы.
|
||||
Если modify=False и проверка приводит к отрицательному результату,
|
||||
записывает ошибку в лог.
|
||||
"""
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
group_name, parent_path = _parse_group_path(group_path)
|
||||
parent_group_id = None
|
||||
|
||||
if parent_path:
|
||||
parent_group = _get_parent_group(realm, parent_path, headers, logger)
|
||||
parent_group_id = parent_group['id']
|
||||
|
||||
group_url = f'{_OIDC_GROUP_BY_PATH_URL_TEMPLATE.format(realm)}/{group_path}'
|
||||
res = requests.get(group_url, headers=headers)
|
||||
|
||||
if res.status_code == 404:
|
||||
if not modify:
|
||||
logger.error(f'Group "{group_path}" does not exist in realm "{realm}", '
|
||||
f'and modify is False. '
|
||||
f'API response text: {res.text}')
|
||||
return
|
||||
|
||||
res = _create_group(realm, group_path, group_name, parent_group_id, headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot create group "{group_path}" '
|
||||
f'exists in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot check if group "{group_path}" '
|
||||
f'exists in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
group = res.json()
|
||||
return group['id']
|
||||
|
||||
|
||||
def _move_or_create_group(realm, access_token, group_path, logger, group_id=None, modify=False):
|
||||
"""
|
||||
Если group_id не задан, то проверяет, что группа по пути group_path есть.
|
||||
Если, при этом, modify=True, то создает группу по пути group_path.
|
||||
|
||||
Возвращает id существующей или созданной группы.
|
||||
Если modify=False и проверка приводит к отрицательному результату,
|
||||
записывает ошибку в лог.
|
||||
|
||||
Если group_id задан, то проверяет, что группа с заданным идентификатором
|
||||
существует и находится по пути group_path.
|
||||
Если группа с заданным идентификатором отсутствует и, при этом, modify=True,
|
||||
то создает группу по пути group_path.
|
||||
Если группа с заданным идентификатором существует, но находится по другому пути,
|
||||
то перемещает группу по пути group_path (считая, что все сегменты кроме последнего
|
||||
образуют имя родительской группы, а последний сегмент - имя; имя не изменяется).
|
||||
|
||||
Возвращает id существующей или созданной группы.
|
||||
Если modify=False и проверка приводит к отрицательному результату,
|
||||
записывает ошибку в лог.
|
||||
"""
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
group_name, parent_path = _parse_group_path(group_path)
|
||||
parent_group_id = None
|
||||
new_group_id = group_id
|
||||
|
||||
if group_id:
|
||||
|
||||
if parent_path:
|
||||
parent_group = _get_parent_group(realm, parent_path, headers, logger)
|
||||
parent_group_id = parent_group['id']
|
||||
|
||||
group_url = f'{_OIDC_GROUPS_URL_TEMPLATE.format(realm)}/{group_id}'
|
||||
res = requests.get(group_url, headers=headers)
|
||||
|
||||
if res.status_code == 404:
|
||||
# если group_id задан, но группа отсутствует, если modify=True,
|
||||
# создаем ее, возвращаем новый id группы;
|
||||
if not modify:
|
||||
logger.error(f'Group "{group_id}" does not exist in realm "{realm}", '
|
||||
f'and modify is False. '
|
||||
f'API response text: {res.text}')
|
||||
return
|
||||
res = _create_group(realm, group_path, group_name, parent_group_id, headers)
|
||||
group = res.json()
|
||||
new_group_id = group['id']
|
||||
elif res.status_code == 200:
|
||||
# если group_id задан, группа существует,
|
||||
# нужно проверить по какому пути она находится;
|
||||
group = res.json()
|
||||
current_path = group['path'].strip('/')
|
||||
# если путь корректный, то возвращаем текущий id группы,
|
||||
# иначе, если modify=True, переносим группу в другую родительскую группу;
|
||||
if group_path == current_path:
|
||||
return group_id
|
||||
if not modify:
|
||||
logger.error(f'Group "{group_id}" '
|
||||
f'path "{group_path}" (realm "{realm}") does not match '
|
||||
f'current path "{current_path}" '
|
||||
f'and modify is False.')
|
||||
return
|
||||
move_group = {
|
||||
'id': group_id,
|
||||
'name': group['name']
|
||||
}
|
||||
if parent_path:
|
||||
groups_children_url = f'{_OIDC_GROUPS_URL_TEMPLATE.format(realm)}/{parent_group_id}/children'
|
||||
res = requests.post(groups_children_url, json=move_group, headers=headers) # 204 ok
|
||||
else:
|
||||
# таким образом фактически происходит перемещение группы в корень;
|
||||
res = requests.post(_OIDC_GROUPS_URL_TEMPLATE.format(realm), json=move_group, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot move group "{group_id}" '
|
||||
f'to parent "{parent_path}" in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
else:
|
||||
raise PlatformServiceTemporaryError(f'Cannot get group "{group_id}" in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
else:
|
||||
# если group_id не задан, нужно проверить, что группа по пути group_path есть,
|
||||
# или создать ее, если она отсутствует и modify=True;
|
||||
new_group_id = _set_group(realm, access_token, group_path, logger, modify)
|
||||
return new_group_id
|
||||
|
||||
|
||||
def delete_group(group_ids, logger, modify):
|
||||
token = auth_keycloak_end_users()
|
||||
group_id = group_ids[OIDC_END_USERS_REALM]
|
||||
_delete_group_in_realm(OIDC_END_USERS_REALM, token, group_id, logger, modify)
|
||||
if OIDC_END_USERS_REALM != OIDC_ROBOTS_REALM:
|
||||
token = auth_keycloak_robots()
|
||||
group_id = group_ids[OIDC_ROBOTS_REALM]
|
||||
_delete_group_in_realm(OIDC_ROBOTS_REALM, token, group_id, logger, modify)
|
||||
|
||||
|
||||
def delete_group_in_robots_realm_by_path(group_path, logger, modify):
|
||||
token = auth_keycloak_robots()
|
||||
|
||||
_delete_group_in_realm_by_path(OIDC_ROBOTS_REALM, token, group_path, logger, modify)
|
||||
|
||||
|
||||
def _delete_group_in_realm(realm, access_token, group_id, logger, modify=False):
|
||||
if not modify:
|
||||
logger.info(f'modify is False, will not delete group {group_id} in realm {realm}')
|
||||
return
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
group_url = f'{_OIDC_GROUPS_URL_TEMPLATE.format(realm)}/{group_id}'
|
||||
res = requests.delete(group_url, headers=headers)
|
||||
if res.status_code == 404:
|
||||
logger.warning(f'Group "{group_id}" doesnt exist in realm "{realm}')
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot delete group "{group_id}" in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
|
||||
|
||||
def _delete_group_in_realm_by_path(realm, access_token, group_path, logger, modify=False):
|
||||
if not modify:
|
||||
logger.info(f'modify is False, will not delete group "{group_path}" in realm {realm}')
|
||||
return
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
group_by_path_url = f'{_OIDC_GROUP_BY_PATH_URL_TEMPLATE.format(OIDC_ROBOTS_REALM)}/{group_path}'
|
||||
res = requests.get(group_by_path_url, headers=headers)
|
||||
if res.status_code == 404:
|
||||
logger.warning(f'Group "{group_path}" doesnt exist in realm "{realm}')
|
||||
return
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot get group "{group_path}" in realm "{OIDC_ROBOTS_REALM}". '
|
||||
f'Response: {res.text}')
|
||||
|
||||
data = res.json()
|
||||
group_id = data['id']
|
||||
|
||||
group_url = f'{_OIDC_GROUPS_URL_TEMPLATE.format(realm)}/{group_id}'
|
||||
res = requests.delete(group_url, headers=headers)
|
||||
if res.status_code == 404:
|
||||
logger.warning(f'Group "{group_id}" doesnt exist in realm "{realm}')
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot delete group "{group_id}" in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
|
||||
|
||||
def set_group_groups_chain(user_namespace, group_name, logger, modify):
|
||||
token = auth_keycloak_end_users()
|
||||
end_users_group_id = _set_group_groups_chain_in_realm(OIDC_END_USERS_REALM, token, user_namespace, group_name,
|
||||
logger, modify)
|
||||
result = {OIDC_END_USERS_REALM: end_users_group_id}
|
||||
if OIDC_END_USERS_REALM != OIDC_ROBOTS_REALM:
|
||||
token = auth_keycloak_robots()
|
||||
robots_group_id = _set_group_groups_chain_in_realm(OIDC_ROBOTS_REALM, token, user_namespace, group_name,
|
||||
logger, modify)
|
||||
result[OIDC_ROBOTS_REALM] = robots_group_id
|
||||
return result
|
||||
|
||||
|
||||
def set_group_groups_chain_in_robots_realm(user_namespace, group_name, logger, modify):
|
||||
token = auth_keycloak_robots()
|
||||
robots_group_id = _set_group_groups_chain_in_realm(OIDC_ROBOTS_REALM, token, user_namespace, group_name,
|
||||
logger, modify)
|
||||
return robots_group_id
|
||||
|
||||
|
||||
def get_accounts_from_secret_attr(secret_data_, attr):
|
||||
secret_accounts = secret_data_[attr]
|
||||
secret_accounts_decoded = base64.b64decode(secret_accounts).decode('utf-8').strip()
|
||||
accounts = secret_accounts_decoded.split('\n')
|
||||
accounts = map(lambda s: s.strip(), accounts)
|
||||
return accounts
|
||||
|
||||
|
||||
def _set_group_groups_chain_in_realm(realm, access_token, user_namespace, group_name, logger, modify):
|
||||
_set_group(realm, access_token, OIDC_MANAGED_GROUPS_ROOT, logger, modify)
|
||||
cluster_group_path = f'{OIDC_MANAGED_GROUPS_ROOT}/{OIDC_CLUSTER_GROUP_NAME}'
|
||||
_set_group(realm, access_token, f'{OIDC_MANAGED_GROUPS_ROOT}/{OIDC_CLUSTER_GROUP_NAME}', logger, modify)
|
||||
user_group_path = f'{cluster_group_path}/{user_namespace}'
|
||||
_set_group(realm, access_token, user_group_path, logger, modify)
|
||||
group_path = f'{user_group_path}/{group_name}'
|
||||
group_id = _set_group(realm, access_token, group_path, logger, modify)
|
||||
return group_id
|
||||
|
||||
|
||||
def _clear_group_accounts(realm, group_id, headers, modify, logger):
|
||||
if not modify:
|
||||
logger.info(f'modify is False, will not clear group {group_id} accounts in realm {realm}')
|
||||
return
|
||||
group_members_url = f'{_OIDC_GROUPS_URL_TEMPLATE.format(realm)}/{group_id}/members'
|
||||
res = requests.get(group_members_url, headers=headers)
|
||||
if res.status_code == 404:
|
||||
logger.warning(f'Group "{group_id}" doesnt exist in realm "{realm}". '
|
||||
f'Cannot remove users from group')
|
||||
return
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot remove users from group "{group_id}" in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
|
||||
data = res.json()
|
||||
for user in data:
|
||||
user_id = user['id']
|
||||
user_group_url = f'{_OIDC_USERS_URL_TEMPLATE.format(realm)}/{user_id}/groups/{group_id}'
|
||||
res = requests.delete(user_group_url, headers=headers)
|
||||
if res.status_code == 404:
|
||||
logger.warning(f'Group "{group_id}" or user "{user_id}" doesnt exist in realm "{realm}". '
|
||||
f'Cannot remove user from group')
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot remove user "{user_id}" from group "{group_id}" '
|
||||
f'in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
|
||||
|
||||
def _set_user_to_group(realm, username, user_id, group_id, headers, modify, logger):
|
||||
if modify:
|
||||
user_group_url = f'{_OIDC_USERS_URL_TEMPLATE.format(realm)}/{user_id}/groups/{group_id}'
|
||||
res = requests.put(user_group_url, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot set user "{user_id}" to group "{group_id}" '
|
||||
f'in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
else:
|
||||
user_groups_url = f'{_OIDC_USERS_URL_TEMPLATE.format(realm)}/{user_id}/groups'
|
||||
res = requests.get(user_groups_url, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot get user "{user_id}" in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
res_data = res.json()
|
||||
if not any(map(lambda g: g['id'] == group_id, res_data)):
|
||||
logger.error(f'User "{username}" is not member of group "{group_id}" in realm "{realm}" '
|
||||
f'and modify is False.')
|
||||
|
||||
|
||||
def _get_user_by_param_if_exists(realm, user_param, param_name, headers, logger):
|
||||
users_url = _OIDC_USERS_URL_TEMPLATE.format(realm)
|
||||
params = {param_name: user_param, 'exact': 'true'}
|
||||
res = requests.get(users_url, params=params, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot get users by {param_name} "{user_param}" '
|
||||
f'in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
res_data = res.json()
|
||||
if not res_data:
|
||||
return None
|
||||
user = res_data[0]
|
||||
if len(res_data) > 1:
|
||||
user_id = user['id']
|
||||
logger.warning(f'There are more than one users with {param_name} "{user_param}" in realm "{realm}", '
|
||||
f'user with id {user_id} is selected')
|
||||
return user
|
||||
|
||||
|
||||
def _get_users_by_param(realm, user_param, param_name, headers, logger):
|
||||
users_url = _OIDC_USERS_URL_TEMPLATE.format(realm)
|
||||
params = {param_name: user_param}
|
||||
res = requests.get(users_url, params=params, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot get users by {param_name} "{user_param}" '
|
||||
f'in realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
users = res.json()
|
||||
return users
|
||||
|
||||
|
||||
def _set_users_to_group_by_params(realm, group_id, users_params, param_name, headers, modify, logger):
|
||||
|
||||
for user_param in users_params:
|
||||
user = _get_user_by_param_if_exists(realm, user_param, param_name, headers, logger)
|
||||
if not user:
|
||||
logger.info(f'User with {param_name} "{user_param}" not found in realm "{realm}"')
|
||||
continue
|
||||
|
||||
user_id = user['id']
|
||||
_set_user_to_group(realm, user_param, user_id, group_id, headers, modify, logger)
|
||||
|
||||
|
||||
def set_group_accounts(group_ids, usernames, emails, robots_full_names, modify, logger):
|
||||
end_users_group_id = group_ids[OIDC_END_USERS_REALM]
|
||||
robots_group_id = group_ids[OIDC_ROBOTS_REALM]
|
||||
token = auth_keycloak_end_users()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
robots_headers = headers
|
||||
|
||||
_clear_group_accounts(OIDC_END_USERS_REALM, end_users_group_id, headers, modify, logger)
|
||||
if OIDC_END_USERS_REALM != OIDC_ROBOTS_REALM:
|
||||
robots_token = auth_keycloak_robots()
|
||||
robots_headers = {"Authorization": f"Bearer {robots_token}"}
|
||||
_clear_group_accounts(OIDC_ROBOTS_REALM, robots_group_id, robots_headers, modify, logger)
|
||||
if usernames:
|
||||
_set_users_to_group_by_params(OIDC_END_USERS_REALM, end_users_group_id, usernames, 'username', headers, modify, logger)
|
||||
if emails:
|
||||
_set_users_to_group_by_params(OIDC_END_USERS_REALM, end_users_group_id, emails, 'email', headers, modify, logger)
|
||||
if robots_full_names:
|
||||
_set_users_to_group_by_params(OIDC_ROBOTS_REALM, robots_group_id, robots_full_names, 'username', robots_headers, modify, logger)
|
||||
|
||||
|
||||
def set_group_accounts_in_robots_realm(group_id, robots_full_names, modify, logger):
|
||||
token = auth_keycloak_robots()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
robots_headers = headers
|
||||
|
||||
_clear_group_accounts(OIDC_ROBOTS_REALM, group_id, headers, modify, logger)
|
||||
if robots_full_names:
|
||||
_set_users_to_group_by_params(OIDC_ROBOTS_REALM, group_id, robots_full_names, 'username', robots_headers,
|
||||
modify, logger)
|
||||
|
||||
|
||||
def set_group_accounts_in_robots_realm_by_path(group_path, robots_full_names, modify, logger):
|
||||
token = auth_keycloak_robots()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
|
||||
group = _get_group_by_path_if_exists(OIDC_ROBOTS_REALM, group_path, headers, logger)
|
||||
if not group:
|
||||
return
|
||||
group_id = group['id']
|
||||
|
||||
_clear_group_accounts(OIDC_ROBOTS_REALM, group_id, headers, modify, logger)
|
||||
if robots_full_names:
|
||||
_set_users_to_group_by_params(OIDC_ROBOTS_REALM, group_id, robots_full_names, 'username', headers,
|
||||
modify, logger)
|
||||
|
||||
|
||||
def _set_user_email(realm, user, email, headers):
|
||||
user_id = user['id']
|
||||
user_url = f'{_OIDC_USERS_URL_TEMPLATE.format(realm)}/{user_id}'
|
||||
user['email'] = email
|
||||
res = requests.put(user_url, json=user, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot set email for user "{user["username"]}" in realm "{realm}", '
|
||||
f'user id = "{user_id}".'
|
||||
f'API response text: {res.text}')
|
||||
|
||||
|
||||
def _create_user(realm, username, email, headers):
|
||||
users_url = _OIDC_USERS_URL_TEMPLATE.format(realm)
|
||||
user = {'username': username, 'email': email, 'enabled': True, 'emailVerified': True,
|
||||
'firstName': username, 'lastName': username}
|
||||
res = requests.post(users_url, json=user, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot create user "{username} ({email})" in realm "{realm}".'
|
||||
f'API response text: {res.text}')
|
||||
|
||||
loc = res.headers.get('Location')
|
||||
if not loc or '/' not in loc:
|
||||
raise PlatformServiceTemporaryError(f'User "{username} ({email})" in realm "{realm}" '
|
||||
f'is created, but Location header is absent or has wrong format.'
|
||||
f'API response text: {res.text}')
|
||||
user_id = loc[loc.rfind('/') + 1:]
|
||||
return user_id
|
||||
|
||||
|
||||
_passwords_alphabet = string.ascii_letters + string.digits
|
||||
|
||||
|
||||
def _create_password():
|
||||
return ''.join(secrets.choice(_passwords_alphabet) for _ in range(20))
|
||||
|
||||
|
||||
def _set_user_password(realm, user_id, headers):
|
||||
user_url = f'{_OIDC_USERS_URL_TEMPLATE.format(realm)}/{user_id}/reset-password'
|
||||
raw_password = _create_password()
|
||||
credentials = {
|
||||
'value': raw_password,
|
||||
'temporary': False
|
||||
}
|
||||
res = requests.put(user_url, json=credentials, headers=headers)
|
||||
if not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot set password for user in realm "{realm}", '
|
||||
f'user id = "{user_id}".'
|
||||
f'API response text: {res.text}')
|
||||
return raw_password
|
||||
|
||||
|
||||
def _set_user(realm, username, email, headers, modify, logger):
|
||||
|
||||
user_by_name = _get_user_by_param_if_exists(realm, username, 'username', headers, logger)
|
||||
user_by_email = _get_user_by_param_if_exists(realm, email, 'email', headers, logger)
|
||||
if not user_by_name and user_by_email:
|
||||
raise PlatformServiceTemporaryError(f'User with username "{username}" does not exist,'
|
||||
f'but user with email "{email}" exists in realm "{realm}". '
|
||||
f'User id = {user_by_email["id"]}.')
|
||||
user = user_by_name
|
||||
user_id = None
|
||||
if user:
|
||||
user_id = user['id']
|
||||
user_email = user_id['email']
|
||||
if user_email != email:
|
||||
if not modify:
|
||||
logger.error(f'User with username "{username}" exists, but email differs from expected, '
|
||||
f'and modify is False. User id = {user_id}, user email = {user_email}.')
|
||||
else:
|
||||
_set_user_email(realm, user, email, headers)
|
||||
else:
|
||||
if not modify:
|
||||
logger.error(f'User with username "{username}" does not exist in realm "{realm}", '
|
||||
f'and modify is False.')
|
||||
else:
|
||||
user_id = _create_user(realm, username, email, headers)
|
||||
return user_id
|
||||
|
||||
|
||||
def _set_users(realm, username_email_pairs, headers, modify, logger):
|
||||
for username, email in username_email_pairs:
|
||||
_set_user(realm, username, email, headers, modify, logger)
|
||||
|
||||
|
||||
def _set_users_with_password(realm, username_email_pairs, headers, modify, logger):
|
||||
username_password_pairs = []
|
||||
for username, email in username_email_pairs:
|
||||
user_id = _set_user(realm, username, email, headers, modify, logger)
|
||||
if user_id:
|
||||
password = _set_user_password(realm, user_id, headers)
|
||||
username_password_pairs.append((username, password))
|
||||
return username_password_pairs
|
||||
|
||||
|
||||
def _delete_accounts(realm, username_substr, headers, modify, logger):
|
||||
if not modify:
|
||||
logger.info(f'modify is False, will not delete accounts '
|
||||
f'with usernames containing "{username_substr}" in realm {realm}')
|
||||
return
|
||||
|
||||
users = _get_users_by_param(realm, username_substr, 'username', headers, logger)
|
||||
for user in users:
|
||||
user_id = user['id']
|
||||
user_url = f'{_OIDC_USERS_URL_TEMPLATE.format(realm)}/{user_id}'
|
||||
res = requests.delete(user_url, headers=headers)
|
||||
if res.status_code == 404:
|
||||
logger.warning(f'User "{user_id}" doesnt exist in realm "{realm}". '
|
||||
f'Cannot delete user.')
|
||||
elif not res.ok:
|
||||
raise PlatformServiceTemporaryError(f'Cannot delete user "{user_id}" '
|
||||
f'from realm "{realm}". '
|
||||
f'API response text: {res.text}')
|
||||
|
||||
|
||||
def set_robot_accounts(robots_full_name_prefix, robots_full_name_email_pairs, modify, logger):
|
||||
token = auth_keycloak_robots()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
|
||||
_delete_accounts(OIDC_ROBOTS_REALM, robots_full_name_prefix, headers, modify, logger)
|
||||
if robots_full_name_email_pairs:
|
||||
robots_full_name_password_pairs = \
|
||||
_set_users_with_password(OIDC_ROBOTS_REALM, robots_full_name_email_pairs, headers, modify, logger)
|
||||
# если пароль не нужно устанвливать - использовать _set_users
|
||||
# _set_users(OIDC_ROBOTS_REALM, robots_full_name_email_pairs, headers, modify, logger)
|
||||
return robots_full_name_password_pairs
|
||||
|
||||
|
||||
# если в будущем получится использовать выпуск токена клиентом robots-admin с имперсонификацией по клиенту и субъекту,
|
||||
# то пароль не потребуется;
|
||||
def create_robot_tokens(robot_name, robot_password):
|
||||
token_url = f'{OIDC_KEYCLOAK_HOST}/realms/{OIDC_ROBOTS_REALM}/protocol/openid-connect/token'
|
||||
data = {'client_id': OIDC_ROBOTS_CLIENT_ID,
|
||||
'grant_type': 'password',
|
||||
'username': robot_name,
|
||||
'password': robot_password}
|
||||
|
||||
res = requests.post(token_url, data=data)
|
||||
if res.status_code != 200:
|
||||
raise PlatformServiceTemporaryError(f'Cannot create robot tokens. API response text: {res.text}')
|
||||
res_data = res.json()
|
||||
return res_data
|
||||
135
controller/src/cmplink/link.py
Normal file
135
controller/src/cmplink/link.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ComponentLink
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from kubernetes.client import CustomObjectsApi, ApiClient, ApiException
|
||||
|
||||
from basic_resources.secrets import read_secret_if_exists, patch_secret_if_exists
|
||||
|
||||
TARGET_KIND_DATASET_COMPONENT = 'DatasetComponent'
|
||||
TARGET_KIND_API_COMPONENT = 'APIComponent'
|
||||
# TARGET_KIND_API_COMPONENT не включается в маппинг
|
||||
TARGET_KIND_TO_CODE = {TARGET_KIND_DATASET_COMPONENT: 'datasetcmp'}
|
||||
|
||||
|
||||
def get_secret_name_from_link(link: dict):
|
||||
link_spec = link['spec']
|
||||
if 'secretRef' not in link_spec:
|
||||
return None
|
||||
secret_ref = link_spec['secretRef']
|
||||
secret_name = secret_ref['name']
|
||||
return secret_name
|
||||
|
||||
|
||||
def get_links_given_predicate(api_client: ApiClient, namespace, predicate):
|
||||
# вместо прохода по списку начиная с версии 1.31 можно использовать CRD selectable fields:
|
||||
# https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#crd-selectable-fields
|
||||
# co_api = CustomObjectsApi(api_client)
|
||||
# field_selector = f'spec.target.credentials={secret_name}'
|
||||
# links_list, _, _ = co_api.list_namespaced_custom_object_with_http_info(
|
||||
# "unified-platform.cs.hse.ru", "v1", secret_namespace,
|
||||
# "componentlinks", field_selector=field_selector)
|
||||
# return links_list['items']
|
||||
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
links_list, _, _ = co_api.list_namespaced_custom_object_with_http_info(
|
||||
"unified-platform.cs.hse.ru", "v1", namespace,
|
||||
"componentlinks")
|
||||
if not links_list['items']:
|
||||
return []
|
||||
|
||||
filtered_list = []
|
||||
for link in links_list['items']:
|
||||
if predicate(link):
|
||||
filtered_list.append(link)
|
||||
|
||||
return filtered_list
|
||||
|
||||
|
||||
def get_links_if_exist(api_client: ApiClient, names, namespace, logger):
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
result = []
|
||||
for name in names:
|
||||
try:
|
||||
link_res = co_api.get_namespaced_custom_object(
|
||||
"unified-platform.cs.hse.ru", "v1", namespace,
|
||||
"componentlinks", name)
|
||||
result.append(link_res)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warning(f'KeycloakGroup ComponentLink "{namespace}.{name}" does not exist')
|
||||
else:
|
||||
raise exc
|
||||
return result
|
||||
|
||||
|
||||
def get_target_resource_if_exists(api_client: ApiClient, target, target_namespace, target_kind, logger):
|
||||
resource_kind = target_kind if target_kind else TARGET_KIND_API_COMPONENT
|
||||
resource_kind_plural = resource_kind.lower() + 's'
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
try:
|
||||
api_obj, _, _ = co_api.get_namespaced_custom_object_with_http_info(
|
||||
"unified-platform.cs.hse.ru", "v1", target_namespace,
|
||||
resource_kind_plural, target)
|
||||
return api_obj
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.info(f'Application {target_namespace} {resource_kind} {target} does not exist.')
|
||||
return None
|
||||
raise exc
|
||||
|
||||
|
||||
def does_target_specified_in_link(target, target_namespace, target_kind, link_spec):
|
||||
targets = link_spec.get('targets')
|
||||
app_name = link_spec['platformAppName']
|
||||
|
||||
if not targets:
|
||||
return False
|
||||
for target_spec in targets:
|
||||
link_target = target_spec['name']
|
||||
link_target_app = target_spec['platformAppName'] if 'platformAppName' in target_spec else app_name
|
||||
passed_target_kind = target_kind or TARGET_KIND_API_COMPONENT
|
||||
link_target_kind = target_spec.get('kind', TARGET_KIND_API_COMPONENT)
|
||||
if link_target == target and link_target_app == target_namespace and link_target_kind == passed_target_kind:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def unset_secret_anno_and_label(api_client, anno_prefix, label, ref_name, secret_name, namespace, logger):
|
||||
secret = read_secret_if_exists(api_client, secret_name, namespace)
|
||||
if not secret:
|
||||
return
|
||||
|
||||
patch = None
|
||||
|
||||
if secret.metadata.annotations is not None:
|
||||
ref_anno = f'{anno_prefix}/{ref_name}'
|
||||
if ref_anno in secret.metadata.annotations:
|
||||
patch = {'metadata': {'annotations': {ref_anno: None}}}
|
||||
secret.metadata.annotations.pop(ref_anno)
|
||||
|
||||
if secret.metadata.labels is not None:
|
||||
another_refs = False
|
||||
if secret.metadata.annotations is not None:
|
||||
for anno in secret.metadata.annotations:
|
||||
if anno.startswith(anno_prefix):
|
||||
another_refs = True
|
||||
break
|
||||
if not another_refs:
|
||||
if not patch:
|
||||
patch = {'metadata': {}}
|
||||
patch['metadata']['labels'] = {label: None}
|
||||
|
||||
if patch:
|
||||
patch_secret_if_exists(api_client, secret_name, namespace, patch)
|
||||
|
||||
|
||||
def get_secret_name_if_set(spec):
|
||||
secret_name = None
|
||||
if 'secretRef' in spec:
|
||||
secret_ref = spec['secretRef']
|
||||
secret_name = secret_ref['name']
|
||||
return secret_name
|
||||
51
controller/src/config.py
Normal file
51
controller/src/config.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
PIPELINES_SERVICE_PORT = 80
|
||||
PIPELINES_EXTERNAL_SERVICE_NAME = 'pipelines-svc.unip-system-controller'
|
||||
|
||||
load_dotenv()
|
||||
|
||||
UNIP_DOMAIN = os.getenv('UNIP_DOMAIN')
|
||||
|
||||
ARGO_CD_API = os.getenv('ARGO_CD_API')
|
||||
ARGO_CD_USER = os.getenv('ARGO_CD_USER')
|
||||
ARGO_CD_PASSWORD = os.getenv('ARGO_CD_PASSWORD')
|
||||
UNIP_CONTROLLER_DEV = os.getenv('UNIP_CONTROLLER_DEV', 'False')
|
||||
|
||||
argo_cd_enabled_var = os.getenv('ARGO_CD_ENABLED', 'True')
|
||||
if argo_cd_enabled_var in {'True', 'False'}:
|
||||
ARGO_CD_ENABLED = False if argo_cd_enabled_var == 'False' else True
|
||||
else:
|
||||
ARGO_CD_ENABLED = False
|
||||
|
||||
OIDC_END_USERS_ISSUER_URL = os.getenv('OIDC_END_USERS_ISSUER_URL')
|
||||
OIDC_END_USERS_REALM = os.getenv('OIDC_END_USERS_REALM')
|
||||
OIDC_END_USERS_CLIENT_ID = os.getenv('OIDC_END_USERS_CLIENT_ID')
|
||||
OIDC_END_USERS_CLIENT_SECRET = os.getenv('OIDC_END_USERS_CLIENT_SECRET')
|
||||
OIDC_END_USERS_ADMIN_CLIENT_ID = os.getenv('OIDC_END_USERS_ADMIN_CLIENT_ID')
|
||||
OIDC_END_USERS_ADMIN_CLIENT_SECRET = os.getenv('OIDC_END_USERS_ADMIN_CLIENT_SECRET')
|
||||
OIDC_END_USERS_COOKIE_SECRET = os.getenv('OIDC_END_USERS_COOKIE_SECRET')
|
||||
OIDC_END_USERS_AUD = os.getenv('OIDC_END_USERS_AUD')
|
||||
|
||||
OIDC_ROBOTS_ISSUER_URL = os.getenv('OIDC_ROBOTS_ISSUER_URL')
|
||||
OIDC_ROBOTS_REALM = os.getenv('OIDC_ROBOTS_REALM')
|
||||
OIDC_ROBOTS_CLIENT_ID = os.getenv('OIDC_ROBOTS_CLIENT_ID')
|
||||
OIDC_ROBOTS_CLIENT_SECRET = os.getenv('OIDC_ROBOTS_CLIENT_SECRET')
|
||||
OIDC_ROBOTS_ADMIN_CLIENT_ID = os.getenv('OIDC_ROBOTS_ADMIN_CLIENT_ID')
|
||||
OIDC_ROBOTS_ADMIN_CLIENT_SECRET = os.getenv('OIDC_ROBOTS_ADMIN_CLIENT_SECRET')
|
||||
OIDC_ROBOTS_AUD = os.getenv('OIDC_ROBOTS_AUD')
|
||||
|
||||
OIDC_KEYCLOAK_HOST = os.getenv('OIDC_KEYCLOAK_HOST')
|
||||
OIDC_MANAGED_GROUPS_ROOT = os.getenv('OIDC_MANAGED_GROUPS_ROOT', 'unip')
|
||||
OIDC_CLUSTER_GROUP_NAME = os.getenv('OIDC_CLUSTER_GROUP_NAME', 'default')
|
||||
_OIDC_MODIFY = os.getenv('OIDC_MODIFY', False)
|
||||
OIDC_MODIFY = True if _OIDC_MODIFY == 'True' or _OIDC_MODIFY == 'true' else False
|
||||
|
||||
163
controller/src/datasetcmp/README.md
Normal file
163
controller/src/datasetcmp/README.md
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
---
|
||||
**Система**: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
|
||||
**Модуль**: DatasetComponent
|
||||
|
||||
**Авторы**: Климин Н.А., Полежаев В.А., Хританков А.С.
|
||||
|
||||
**Дата создания**: 2024 г.
|
||||
|
||||
---
|
||||
|
||||
# DatasetComponent
|
||||
|
||||
## Конфигурация контроллера
|
||||
|
||||
- `FILES_API_BASE_URL` - URL к Files API
|
||||
- `INGRESS_RULE_HOST` - хост правила ингресса
|
||||
- `INGRESS_BACKEND_SERVICE_NAME` - backend-сервис для правила
|
||||
- `INGRESS_BACKEND_SERVICE_PORT` - порт backend-сервиса для правила
|
||||
- `UNIP_DATASET_CMP_CHECK_CONTROLLER_PERMISSIONS` - включает или выключает (`True` \ `False`) проверку прав контроллера;
|
||||
- `UNIP_DATASET_CMP_VALIDATE` - включает или выключает (`True` \ `False`) валидацию датасета;
|
||||
|
||||
## Алгоритм работы
|
||||
|
||||
Среди обработчиков контроллера есть:
|
||||
|
||||
- `set_dataset_component_rules` - обрабатывает создание и изменение модуля
|
||||
- `unset_dataset_component_rules` - обрабатывает удаление модуля
|
||||
|
||||
### `set_dataset_component_rules`
|
||||
|
||||
1. Валидируются данные в манифесте модуля (запрашиваются файлы из API, хэшируются и валидируются) в `validate_metadata`
|
||||
2. Валидируется файл метаданных на основе [JSON-схем schema.org](https://raw.githubusercontent.com/charlestati/schema-org-json-schemas/89ed608d2bd03548c10a086c58af27090920bcc5/schemas)
|
||||
3. Валидируется файл со списокм файлов (запрашиваются локации из API)
|
||||
4. Запрашивается конфигурация ингресса
|
||||
5. Создаются или удаляются правила в `patch_rules`
|
||||
- Если была добавлена версия, то для нее добавляются правила
|
||||
- Если была удалена версия, то правила связанные с ней удаляются (поиск удаленных версий происходит на основе имени ящика)
|
||||
|
||||
### `unset_dataset_component_rules`
|
||||
|
||||
1. Если ингресс содержит правила только к данному датасету, то удаляется весь ингресс
|
||||
2. Если ингресс содержит правила из других датасетов, то удаляются только правила удаленного датасета
|
||||
|
||||
## Пример манифеста модуля DatasetComponent
|
||||
|
||||
```yaml
|
||||
apiVersion: unified-platform.cs.hse.ru/v1
|
||||
kind: DatasetComponent
|
||||
metadata:
|
||||
name: samokat
|
||||
namespace: example-namespace
|
||||
spec:
|
||||
box: default-s3-box
|
||||
filesApiSecret: basic-auth-credentials-not-hashed
|
||||
restfulApi:
|
||||
auth:
|
||||
basic:
|
||||
credentials: basic-auth-datacmp-credentials
|
||||
versions:
|
||||
- name: v1
|
||||
contents:
|
||||
files:
|
||||
sha256: 4575809932e33e0e88108d8296daec4e19223e85a87c101701dfd5efdb324931
|
||||
metadata:
|
||||
sha256: 88439bb7372a7d5913b3c0fcbf6a78f997eb05cb61fd4aa6a07e49b4799dc8d4
|
||||
readme:
|
||||
path: samokat_readme.md
|
||||
sha256: a6792b14c9b7f112055c64e296d940476cab7d24ed4fadd732c356e750de2ec0
|
||||
```
|
||||
|
||||
## CLI
|
||||
|
||||
Для простоты создания манифеста модуля и загрузки данных в Files API был создан скрипт `datasetcmp/cli.py`
|
||||
|
||||
## Развертывание модуля данных и использование
|
||||
|
||||
Для развертывания модуля данных требуется выполнить следущие пункты:
|
||||
|
||||
1. Создать файл метаданных, файл со списокм файлов модуля и файлом с текстовым описанием (например, в формате Markdown).
|
||||
2. Загрузить файлы модуля через файловое API.
|
||||
3. Создать манифест для развертывания.
|
||||
4. Развернуть в кластере Kubernetes.
|
||||
|
||||
Для части пунктов можно восипользоваться скриптом `datasetcmp/cli.py`.
|
||||
|
||||
```shell
|
||||
python datasetcmp/cli.py -h
|
||||
```
|
||||
|
||||
### Создание файлов с метаданными, списком файлов и текстовым описанием
|
||||
|
||||
Файл с метаданными имеет JSON-LD формат с схемой [schema.org/Dataset](https://schema.org/Dataset) валидация происходит с использованием [JSON-схем](https://raw.githubusercontent.com/charlestati/schema-org-json-schemas/89ed608d2bd03548c10a086c58af27090920bcc5/schemas). Для создания файла можно воспользоваться любым текстовым редактором или специализированным для JSON-LD формата.
|
||||
|
||||
Стоит отметить что пользоваться валидатором schema.org не рекомендуется, так как он имеет позволяет лишние поля или неправильные данные в них. Для валидации можно воспользоваться скриптом упомянутым выше.
|
||||
|
||||
```shell
|
||||
python datasetcmp/cli.py --verbose metadata -m tests/datasetcmp/v1/metadata.json
|
||||
```
|
||||
|
||||
Файл со списком файлов также можно создать используя подготовленный скрипт. Формат на текщуий может быть любым, но рекомендуется воспользоваться скриптом для создания точного описания всех файлов.
|
||||
|
||||
```shell
|
||||
python datasetcmp/cli.py files tests/datasetcmp/v1/data ./files.json
|
||||
```
|
||||
|
||||
Файл с текстовым описанием может быть в любом формате, однако рекомендуется использовать Markdown.
|
||||
|
||||
### Загрузить файлы модуля через файловое API
|
||||
|
||||
Подготовленный скрипт позволяет загрузить файлы через файловое API. Можно воспользоваться любыми другими способами, в том числе и обращаясь к API напрямую.
|
||||
|
||||
В примере ниже загружаются файлы из директории `tests/datasetcmp/v1` с файлами с метаданными, со списком файлов и текстовым описанием в приложение `test-dataset-controlle` в ящик `default-s3-box` в локацию `v1`. В данном случае команда используется для создания версии `v1`.
|
||||
|
||||
```shell
|
||||
python datasetcmp/cli.py upload -a "test-dataset-controller" -b "default-s3-box" -l "v1" -u user -p password "tests/datasetcmp/v1"
|
||||
```
|
||||
|
||||
В примере ниже загружаются файлы из директории `tests/datasetcmp/data` с файлами датасета в приложение `test-dataset-controlle` в ящик `default-s3-box` в локацию `data`. Вложенность сохраняется. В случае если файлов много и произошла ошибка можно начать загрузку с того файла на котором произошла ошибка (ключи `--fr` и `--to`).
|
||||
|
||||
```shell
|
||||
python datasetcmp/cli.py upload -a "test-dataset-controller" -b "default-s3-box" -l "data" -u user -p password "tests/datasetcmp/data"
|
||||
```
|
||||
|
||||
### Создать манифест для развертывания
|
||||
|
||||
Для создания манифеста можно воспользоваться скриптом. Аналогично другим пунктам, можно создать файл самим используя пример и CRD как основу.
|
||||
|
||||
```shell
|
||||
python datasetcmp/cli.py manifest -h
|
||||
```
|
||||
|
||||
В примере ниже создается манифейст для версии `v1` куда передаются файлы из примеров выше, а также секрет разработчика, который позволит контроллеру работать с файловым API.
|
||||
|
||||
```shell
|
||||
python datasetcmp/cli.py manifest -a "test-dataset-controller" -b "default-s3-box" -v "v1" -r "tests/datasetcmp/v1/weapons_dataset.md" -f "tests/datasetcmp/v1/files.json" -m "tests/datasetcmp/v1/metadata.json" -n weapons_dataset -N example-namespace "tests/datasetcmp/dataset.yaml" --secrets-developer basic-auth-credentials-not-hashed
|
||||
```
|
||||
|
||||
Стоит отметить пункт, что секрет разработчика не должен быть захэширован, так как контроллер должен отправлять данные для авторизации в API.
|
||||
|
||||
Другим важным пунктом является секрет пользователя ([htpasswd](https://kubernetes.github.io/ingress-nginx/examples/auth/basic/) секрет), если ключ `--secrets-user` в скрипте не указан или в манифесте не указан спосов авторизации, то контроллер возьмет секреты для авторизации из `<name>-joint-cred`, где `name` - название создаваемого модуля данных (поле `metadata.name` в манифесте модуля).
|
||||
|
||||
### Развернуть в кластере Kubernetes
|
||||
|
||||
Перед развертыванием требуется проверить, что файл сгенерирован со всеми нужными параметрами, далее можно развернуть используя `kubectl` или любой другой интерфейс.
|
||||
|
||||
```shell
|
||||
kubectl apply -f tests/datasetcmp/dataset.yaml
|
||||
```
|
||||
|
||||
### Рекомендация по структуре файлов
|
||||
|
||||
Пересекающиеся файлы версий можно расположить в общей директории `data`. В директориях с метаданными версии также можно расположить файлы, однако рекомендуется все файлы датасета расположить внутри `data`
|
||||
|
||||
```
|
||||
<app>/<box>/
|
||||
v1/ # версия 1 (файлы с метаданными, файлы датасета для версии 1)
|
||||
v2/ # версия 2 (файлы с метаданными, файлы датасета для версии 2)
|
||||
data/ # общие файлы для всех версий (файлы датасета)
|
||||
test/
|
||||
train/
|
||||
...
|
||||
```
|
||||
0
controller/src/datasetcmp/__init__.py
Normal file
0
controller/src/datasetcmp/__init__.py
Normal file
350
controller/src/datasetcmp/cli.py
Normal file
350
controller/src/datasetcmp/cli.py
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DatasetComponent
|
||||
# Авторы: Климин Н.А.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
import argparse
|
||||
from functools import cache
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
import hashlib
|
||||
import yaml
|
||||
import sys
|
||||
import mimetypes
|
||||
|
||||
# pip install jsonschema requests
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from referencing import Registry, Resource
|
||||
import jsonschema
|
||||
import jsonschema.exceptions
|
||||
|
||||
|
||||
mimetypes.add_type("text/markdown", ".md")
|
||||
|
||||
BASE_URL = "https://platform-dev-cs-hse.objectoriented.ru"
|
||||
APP_BOX_PATTERN = re.compile(r"^[a-zA-Z0-9_-]{1,50}$")
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(description="Prepare data for DataSet component")
|
||||
parser.add_argument("--verbose", help="Verbose output", action="store_true")
|
||||
subparsers = parser.add_subparsers()
|
||||
|
||||
metadata_parser = subparsers.add_parser("metadata", help="Process metadata")
|
||||
metadata_parser.add_argument(
|
||||
"-m",
|
||||
"--metadata",
|
||||
help="Path to dataset metadata",
|
||||
required=True,
|
||||
)
|
||||
metadata_parser.set_defaults(subparser="metadata")
|
||||
|
||||
files_parser = subparsers.add_parser("files", help="Process files")
|
||||
files_parser.add_argument("files", help="Directory containing files")
|
||||
files_parser.add_argument("output", help="Name of output file")
|
||||
files_parser.set_defaults(subparser="files")
|
||||
|
||||
manifest_parser = subparsers.add_parser("manifest", help="Generate manifest")
|
||||
manifest_parser.add_argument("-a", "--app", help="Application name", required=True)
|
||||
manifest_parser.add_argument("-b", "--box", help="Box name", required=True)
|
||||
manifest_parser.add_argument("-v", "--version", help="Version name", required=True)
|
||||
manifest_parser.add_argument(
|
||||
"-r",
|
||||
"--readme",
|
||||
help="Path to dataset human readable info",
|
||||
required=True,
|
||||
)
|
||||
manifest_parser.add_argument(
|
||||
"-f",
|
||||
"--files",
|
||||
help="Path to dataset contents file",
|
||||
required=True,
|
||||
)
|
||||
manifest_parser.add_argument(
|
||||
"-m",
|
||||
"--metadata",
|
||||
help="Path to dataset metadata",
|
||||
required=True,
|
||||
)
|
||||
manifest_parser.add_argument(
|
||||
"-n",
|
||||
"--name",
|
||||
help="Resource name",
|
||||
required=True,
|
||||
)
|
||||
manifest_parser.add_argument(
|
||||
"-N",
|
||||
"--namespace",
|
||||
help="Resource namespace",
|
||||
required=True,
|
||||
)
|
||||
manifest_parser.add_argument(
|
||||
"--secrets-developer",
|
||||
help="Developer auth secret in namespace",
|
||||
required=True,
|
||||
)
|
||||
manifest_parser.add_argument(
|
||||
"--secrets-user",
|
||||
help="User auth secret (hashed) in namespace",
|
||||
)
|
||||
manifest_parser.add_argument("output", help="Name of output file")
|
||||
manifest_parser.set_defaults(subparser="manifest")
|
||||
|
||||
upload_parser = subparsers.add_parser("upload", help="Upload files to server")
|
||||
upload_parser.add_argument("-a", "--app", help="Application name", required=True)
|
||||
upload_parser.add_argument("-b", "--box", help="Box name", required=True)
|
||||
upload_parser.add_argument("-l", "--location", help="Location (in API)", required=True)
|
||||
upload_parser.add_argument("files", help="Directory containing files")
|
||||
upload_parser.add_argument("-u", "--username", help="Username for authentication")
|
||||
upload_parser.add_argument("-p", "--password", help="Password for authentication")
|
||||
upload_parser.add_argument("--base-url", help="Files API URL", default=BASE_URL)
|
||||
upload_parser.add_argument("--fr", help="First file index", type=int, default=0)
|
||||
upload_parser.add_argument("--to", help="Last file index (including)", type=int, default=-1)
|
||||
upload_parser.set_defaults(subparser="upload")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.subparser in ["manifest", "upload"]:
|
||||
if APP_BOX_PATTERN.fullmatch(args.app) is None:
|
||||
parser.error(
|
||||
f"Application name is invalid, should match {APP_BOX_PATTERN.pattern}"
|
||||
)
|
||||
|
||||
if APP_BOX_PATTERN.fullmatch(args.box) is None:
|
||||
parser.error(
|
||||
f"Box name is invalid, should match {APP_BOX_PATTERN.pattern}"
|
||||
)
|
||||
|
||||
if args.subparser == "manifest":
|
||||
if APP_BOX_PATTERN.fullmatch(args.name) is None:
|
||||
parser.error(
|
||||
f"Resource name is invalid, should match {APP_BOX_PATTERN.pattern}"
|
||||
)
|
||||
if APP_BOX_PATTERN.fullmatch(args.namespace) is None:
|
||||
parser.error(
|
||||
f"Resource namespace is invalid, should match {APP_BOX_PATTERN.pattern}"
|
||||
)
|
||||
|
||||
args.output = pathlib.Path(args.output)
|
||||
args.readme = pathlib.Path(args.readme)
|
||||
args.files = pathlib.Path(args.files)
|
||||
args.metadata = pathlib.Path(args.metadata)
|
||||
if args.output.is_dir():
|
||||
parser.error("Output path is directory")
|
||||
if not args.readme.is_file():
|
||||
parser.error("Dataset human readable info is not a file or doesn't exists")
|
||||
if not args.files.is_file():
|
||||
parser.error("Dataset contents file is not a file or doesn't exists")
|
||||
if not args.metadata.is_file():
|
||||
parser.error("Dataset metadata is not a file or doesn't exists")
|
||||
|
||||
if args.subparser == "upload":
|
||||
args.files = pathlib.Path(args.files)
|
||||
if not args.files.is_dir():
|
||||
parser.error("Dataset files path is not a directory or doesn't exists")
|
||||
if args.username is not None and args.password is None:
|
||||
parser.error("Both username and password should be set or unset")
|
||||
if args.username is None and args.password is not None:
|
||||
parser.error("Username and password should be both set or both unset")
|
||||
|
||||
if args.subparser == "metadata":
|
||||
args.metadata = pathlib.Path(args.metadata)
|
||||
if not args.metadata.is_file():
|
||||
parser.error("Dataset metadata is not a file or doesn't exists")
|
||||
|
||||
if args.subparser == "files":
|
||||
args.output = pathlib.Path(args.output)
|
||||
args.files = pathlib.Path(args.files)
|
||||
if args.output.is_dir():
|
||||
parser.error("Output path is directory")
|
||||
if not args.files.is_dir():
|
||||
parser.error("Dataset files path is not a directory or doesn't exists")
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def get_checksum(data):
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
|
||||
|
||||
def generate_manifest(args):
|
||||
new_version = {
|
||||
"name": args.version,
|
||||
"contents": {
|
||||
"readme": {
|
||||
"path": args.readme.name,
|
||||
"sha256": get_checksum(args.readme.read_bytes()),
|
||||
},
|
||||
"metadata": {
|
||||
"path": args.metadata.name,
|
||||
"sha256": get_checksum(args.metadata.read_bytes()),
|
||||
},
|
||||
"files": {
|
||||
"path": args.files.name,
|
||||
"sha256": get_checksum(args.files.read_bytes()),
|
||||
},
|
||||
},
|
||||
}
|
||||
data = {
|
||||
"apiVersion": "unified-platform.cs.hse.ru/v1",
|
||||
"kind": "DatasetComponent",
|
||||
"metadata": {
|
||||
"name": args.name,
|
||||
"namespace": args.namespace,
|
||||
},
|
||||
"spec": {
|
||||
"box": args.box,
|
||||
"versions": [
|
||||
new_version
|
||||
]
|
||||
},
|
||||
}
|
||||
if args.output.exists():
|
||||
try:
|
||||
existing_data = yaml.safe_load(args.output.read_text())
|
||||
for i, version in enumerate(existing_data["spec"]["versions"]):
|
||||
if version["name"] == args.version:
|
||||
existing_data["spec"]["versions"][i] = new_version
|
||||
break
|
||||
else:
|
||||
existing_data["spec"]["versions"].append(new_version)
|
||||
data = existing_data
|
||||
except Exception as e:
|
||||
print(f"Invalid output file! Rewriting file!")
|
||||
if args.secrets_developer is not None:
|
||||
data["spec"]["filesApiSecret"] = args.secrets_developer
|
||||
if args.secrets_user is not None:
|
||||
data["spec"]["restfulApi"] = {"auth": {"basic": {"credentials": args.secrets_user}}}
|
||||
args.output.write_text(yaml.safe_dump(data))
|
||||
if args.verbose:
|
||||
print("Done!")
|
||||
|
||||
def upload_files(args):
|
||||
auth = None
|
||||
if args.username is not None:
|
||||
auth = HTTPBasicAuth(args.username, args.password)
|
||||
files = sorted(list(args.files.glob("**/*")))
|
||||
if args.to == -1:
|
||||
args.to = len(files)
|
||||
for i, file in enumerate(files[args.fr:args.to]):
|
||||
rel_file = file.relative_to(args.files)
|
||||
if file.is_file():
|
||||
path = f"{args.base_url}/{args.app}/files/{args.box}/{args.location}/{str(rel_file)}"
|
||||
response = requests.put(path, auth=auth)
|
||||
if response.status_code != 200:
|
||||
print(f"Error while creating location {str(rel_file)}: {response.text}")
|
||||
return 1
|
||||
urls = response.json()
|
||||
headers = {}
|
||||
mime_type = mimetypes.guess_type(path)[0]
|
||||
if mime_type is not None:
|
||||
headers["content-type"] = mime_type
|
||||
response = requests.put(
|
||||
urls["presigned_put_url"],
|
||||
data=file.read_bytes(),
|
||||
auth=auth,
|
||||
headers=headers,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"Error while uploading {str(rel_file)}: {response.text}")
|
||||
return 1
|
||||
elif args.verbose:
|
||||
print(f"{i+1+args.fr:>8} / {args.to}. Upload for file '{file}' to '{path}' completed")
|
||||
if args.verbose:
|
||||
print("Done!")
|
||||
|
||||
|
||||
def validate_metadata_file(args):
|
||||
schemas = "https://raw.githubusercontent.com/charlestati/schema-org-json-schemas/89ed608d2bd03548c10a086c58af27090920bcc5/schemas"
|
||||
remote_schemas = schemas.startswith("https://") or schemas.startswith("http://")
|
||||
prefix = "schema:"
|
||||
|
||||
@cache
|
||||
def retrieve_schema(uri):
|
||||
path = f"{schemas}/{uri.removeprefix(prefix)}.schema.json"
|
||||
if remote_schemas:
|
||||
response = requests.get(path)
|
||||
if response.status_code != 200:
|
||||
print(f"Error while retrieving schema {uri}: {response.text}")
|
||||
exit(-1)
|
||||
schema = response.json()
|
||||
else:
|
||||
file = pathlib.Path(path)
|
||||
if not file.is_file():
|
||||
print(f"Error while retrieving schema {uri}: {file} is not a file")
|
||||
exit(-1)
|
||||
schema = json.loads(file.read_text())
|
||||
return Resource.opaque(schema)
|
||||
|
||||
registry = Registry(retrieve=retrieve_schema)
|
||||
schema = retrieve_schema("Dataset").contents
|
||||
json_ld = json.loads(args.metadata.read_text())
|
||||
|
||||
v = jsonschema.Draft202012Validator(schema, registry=registry)
|
||||
errors = sorted(v.iter_errors(json_ld), key=lambda e: e.path)
|
||||
if not len(errors):
|
||||
if args.verbose:
|
||||
print("Ok!")
|
||||
return
|
||||
print("Validation error:")
|
||||
for i, error in enumerate(errors):
|
||||
print(f"{i+1}) {error.json_path} \n {error.message}")
|
||||
exit(-1)
|
||||
|
||||
|
||||
def create_files_file(args):
|
||||
def search_files(_path: pathlib.Path):
|
||||
files = []
|
||||
for path in _path.iterdir():
|
||||
if path.is_file():
|
||||
files.append(
|
||||
{
|
||||
"kind": "file",
|
||||
"path": path.name,
|
||||
"sha256": get_checksum(path.read_bytes()),
|
||||
}
|
||||
)
|
||||
else:
|
||||
files.append(
|
||||
{
|
||||
"kind": "directory",
|
||||
"path": path.name,
|
||||
"files": search_files(path),
|
||||
}
|
||||
)
|
||||
if args.verbose:
|
||||
print(f"Path '{path}' processed")
|
||||
return files
|
||||
|
||||
args.output.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"kind": "directory",
|
||||
"path": args.files.name,
|
||||
"files": search_files(args.files),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
if args.verbose:
|
||||
print("Done!")
|
||||
|
||||
|
||||
def cli(args):
|
||||
if args.subparser == "manifest":
|
||||
generate_manifest(args)
|
||||
elif args.subparser == "upload":
|
||||
upload_files(args)
|
||||
elif args.subparser == "metadata":
|
||||
validate_metadata_file(args)
|
||||
elif args.subparser == "files":
|
||||
create_files_file(args)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(cli(parse_arguments()))
|
||||
19
controller/src/datasetcmp/config.py
Normal file
19
controller/src/datasetcmp/config.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DatasetComponent
|
||||
# Авторы: Полежаев В.А., Климин Н.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
check_controller_permissions = os.getenv('UNIP_DATASET_CMP_CHECK_CONTROLLER_PERMISSIONS', 'True')
|
||||
if check_controller_permissions in {'True', 'False'}:
|
||||
UNIP_DATASET_CMP_CHECK_CONTROLLER_PERMISSIONS = False if check_controller_permissions == 'False' else True
|
||||
else:
|
||||
UNIP_DATASET_CMP_CHECK_CONTROLLER_PERMISSIONS = False
|
||||
|
||||
validate = os.getenv('UNIP_DATASET_CMP_VALIDATE', 'True')
|
||||
if validate in {'True', 'False'}:
|
||||
UNIP_DATASET_CMP_VALIDATE = False if validate == 'False' else True
|
||||
else:
|
||||
UNIP_DATASET_CMP_VALIDATE = False
|
||||
26
controller/src/datasetcmp/files.py
Normal file
26
controller/src/datasetcmp/files.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DatasetComponent
|
||||
# Авторы: Климин Н.А., Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
from logging import Logger
|
||||
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
|
||||
from datasetcmp.utils import settings
|
||||
|
||||
|
||||
def get_location(username_password: tuple[str, str], app: str, box: str, location: str, logger: Logger):
|
||||
url = settings.files_api_base_url + f"/{app}/files/{box}/{location}"
|
||||
auth = HTTPBasicAuth(username_password[0], username_password[1])
|
||||
response = requests.get(url, auth=auth)
|
||||
logger.info(f"Response from '{url}' with status '{response.status_code}'")
|
||||
return response
|
||||
|
||||
|
||||
def get_content(url: str, logger: "Logger") -> bytes:
|
||||
response = requests.get(url)
|
||||
logger.info(f"Response from '{url}' with status '{response.status_code}'")
|
||||
return response.content
|
||||
556
controller/src/datasetcmp/handlers.py
Normal file
556
controller/src/datasetcmp/handlers.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DatasetComponent
|
||||
# Авторы: Полежаев В.А., Климин Н.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
import kopf
|
||||
from kubernetes.client import ApiClient, NetworkingV1Api, V1HTTPIngressPath, V1IngressBackend, V1IngressServiceBackend, \
|
||||
V1ServiceBackendPort, V1IngressRule, V1HTTPIngressRuleValue, AuthorizationV1Api, V1SelfSubjectRulesReview, \
|
||||
CoreV1Api, ApiException, V1IngressTLS, V1IngressSpec, V1ObjectMeta, V1Ingress
|
||||
|
||||
# DatasetComponent имеет тесную связь с APIComponent, поэтому доступ к защищенным методам допустим
|
||||
# noinspection PyProtectedMember
|
||||
from apicmp.handlers import _construct_oauth2_proxy_deployment_model as construct_api_cmp_oauth2_proxy_deployment_model, \
|
||||
_construct_oauth2_proxy_ingress_model as construct_api_cmp_oauth2_proxy_ingress_model, \
|
||||
_construct_oauth2_proxy_service_model as construct_api_cmp_oauth2_proxy_service_model, \
|
||||
_construct_oauth2_proxy_np as construct_api_cmp_oauth2_proxy_np, \
|
||||
join_url_segments, _create_or_update_oauth2_proxy_ingress, _create_or_update_oauth2_proxy_deployment, \
|
||||
_create_or_update_oauth2_proxy_service, _create_or_update_oauth2_proxy_np, _get_oidc_from_spec, \
|
||||
_get_basic_from_spec
|
||||
from auth import get_basic_auth_credentials
|
||||
from basic_resources.deployments import delete_deployment_if_exists
|
||||
from basic_resources.ingresses import delete_ingress_if_exists, read_ingress_if_exists
|
||||
from basic_resources.network_policies import delete_network_policy_if_exists
|
||||
from basic_resources.services import delete_svc_if_exists
|
||||
from bound import not_dev_namespace
|
||||
from cmplink.basic_auth_link import create_or_update_basic_auth_joint_secret, get_basic_auth_joint_secret_name
|
||||
from cmplink.keycloak_group_link import get_oauth2_proxy_deployment_name, get_groups_joint_secret_name, \
|
||||
create_or_update_groups_joint_secret
|
||||
from cmplink.link import TARGET_KIND_DATASET_COMPONENT
|
||||
from config import UNIP_DOMAIN
|
||||
from datasetcmp.config import UNIP_DATASET_CMP_CHECK_CONTROLLER_PERMISSIONS, UNIP_DATASET_CMP_VALIDATE
|
||||
from datasetcmp.utils import settings, required_permissions, ProcessingError
|
||||
from datasetcmp.validation import validate_metadata
|
||||
from kopf_k8s_client import api_client
|
||||
from parse import get_user_namespace
|
||||
|
||||
|
||||
def _get_dataset_cmp_ingress_name(name):
|
||||
return f'{name}-datasetcmp-ingress'
|
||||
|
||||
|
||||
def _get_oauth2_proxy_ingress_name(name):
|
||||
return f'{name}-datasetcmp-oauth2-ingress'
|
||||
|
||||
|
||||
def _get_oauth2_proxy_path_prefix(namespace, upstream_name):
|
||||
return '/' + namespace + '/proxies/datasetcmp-' + upstream_name
|
||||
|
||||
|
||||
def _get_oauth2_proxy_path(namespace, upstream_name):
|
||||
return _get_oauth2_proxy_path_prefix(namespace, upstream_name) + '(/|$)(.*)'
|
||||
|
||||
|
||||
def _get_oauth2_proxy_service_name(name):
|
||||
return f'{name}-datasetcmp-oauth2-svc'
|
||||
|
||||
|
||||
def _get_oauth2_proxy_selector_label(name):
|
||||
return f'{name}-datasetcmp-oauth2-proxy'
|
||||
|
||||
|
||||
def _get_oauth2_proxy_np_name(name):
|
||||
return f'datasetcmp-ingress-{name}-oauth2-traffic'
|
||||
|
||||
|
||||
def _construct_oauth2_proxy_ingress_model(name: str, namespace: str):
|
||||
|
||||
ingress_model = construct_api_cmp_oauth2_proxy_ingress_model(name, namespace)
|
||||
ingress_model['name'] = _get_oauth2_proxy_ingress_name(name)
|
||||
ingress_model['path'] = _get_oauth2_proxy_path(namespace, name)
|
||||
ingress_model['service_name'] = _get_oauth2_proxy_service_name(name)
|
||||
|
||||
return ingress_model
|
||||
|
||||
|
||||
def _construct_oauth2_proxy_deployment_model(name: str, namespace: str, oidc: dict):
|
||||
|
||||
deployment_model = construct_api_cmp_oauth2_proxy_deployment_model(name, namespace, oidc)
|
||||
|
||||
base_url = 'https://' + UNIP_DOMAIN
|
||||
|
||||
proxy_path_prefix = _get_oauth2_proxy_path_prefix(namespace, name)
|
||||
redirect_url_parts = [proxy_path_prefix, 'oauth2/callback']
|
||||
redirect_url = join_url_segments(redirect_url_parts)
|
||||
redirect_url = urllib.parse.urljoin(base_url, redirect_url)
|
||||
|
||||
proxy_prefix_parts = [proxy_path_prefix, 'oauth2']
|
||||
proxy_prefix = '/' + join_url_segments(proxy_prefix_parts)
|
||||
|
||||
deployment_model['deployment_name'] = get_oauth2_proxy_deployment_name(name, TARGET_KIND_DATASET_COMPONENT)
|
||||
deployment_model['selector_label'] = _get_oauth2_proxy_selector_label(name)
|
||||
deployment_model['redirect_url'] = redirect_url
|
||||
deployment_model['proxy_prefix'] = proxy_prefix
|
||||
deployment_model['groups_secret_name'] = get_groups_joint_secret_name(name, TARGET_KIND_DATASET_COMPONENT)
|
||||
|
||||
return deployment_model
|
||||
|
||||
|
||||
def _construct_oauth2_proxy_service_model(name: str, namespace: str):
|
||||
service_model = construct_api_cmp_oauth2_proxy_service_model(name, namespace)
|
||||
service_model['selector_label'] = _get_oauth2_proxy_selector_label(name)
|
||||
service_model['service_name'] = _get_oauth2_proxy_service_name(name)
|
||||
|
||||
return service_model
|
||||
|
||||
|
||||
def _construct_oauth2_proxy_np(name: str, namespace: str):
|
||||
np_model = construct_api_cmp_oauth2_proxy_np(name, namespace)
|
||||
np_model['np_name'] = _get_oauth2_proxy_np_name(name)
|
||||
np_model['selector_label'] = _get_oauth2_proxy_selector_label(name)
|
||||
return np_model
|
||||
|
||||
|
||||
def _delete_oauth2_proxy_ingress_if_exists(api_client_: ApiClient, name: str, namespace: str,
|
||||
logger):
|
||||
ingress_name = _get_oauth2_proxy_ingress_name(name)
|
||||
delete_ingress_if_exists(api_client=api_client_, name=ingress_name, namespace=namespace, logger=logger)
|
||||
|
||||
|
||||
def _delete_oauth2_proxy_deployment_if_exists(api_client_: ApiClient, name: str, namespace: str, logger):
|
||||
deployment_name = get_oauth2_proxy_deployment_name(name, TARGET_KIND_DATASET_COMPONENT)
|
||||
delete_deployment_if_exists(api_client_, namespace, deployment_name, logger)
|
||||
|
||||
|
||||
def _delete_oauth2_proxy_service_if_exists(api_client_: ApiClient, name: str, namespace: str, logger):
|
||||
service_name = _get_oauth2_proxy_service_name(name)
|
||||
delete_svc_if_exists(api_client_, namespace, service_name, logger)
|
||||
|
||||
|
||||
def _delete_oauth2_proxy_np_if_exists(api_client_: ApiClient, name: str, namespace: str, logger):
|
||||
np_name = _get_oauth2_proxy_np_name(name)
|
||||
delete_network_policy_if_exists(api_client_, np_name, namespace, logger)
|
||||
|
||||
|
||||
def _should_oauth2_proxy_be_created(new_oidc):
|
||||
if new_oidc is None:
|
||||
return False
|
||||
return new_oidc.get('enabled', True)
|
||||
|
||||
|
||||
def _update_oauth2_proxy(dataset_cmp_name, namespace, new_oidc, logger):
|
||||
create_oauth2_proxy = _should_oauth2_proxy_be_created(new_oidc)
|
||||
|
||||
@kopf.subhandler(id=f'update-oauth2-proxy-ingress-{dataset_cmp_name}')
|
||||
def _update_oauth2_proxy_ingress_handler(**_):
|
||||
_delete_oauth2_proxy_ingress_if_exists(api_client, dataset_cmp_name, namespace, logger)
|
||||
if create_oauth2_proxy:
|
||||
ingress_name = _get_oauth2_proxy_ingress_name(dataset_cmp_name)
|
||||
ingress_model = _construct_oauth2_proxy_ingress_model(dataset_cmp_name, namespace)
|
||||
_create_or_update_oauth2_proxy_ingress(api_client, ingress_name, namespace, ingress_model,
|
||||
logger)
|
||||
|
||||
@kopf.subhandler(id=f'update-oauth2-proxy-deployment-{dataset_cmp_name}')
|
||||
def _update_oauth2_proxy_deployment_handler(**_):
|
||||
_delete_oauth2_proxy_deployment_if_exists(api_client, dataset_cmp_name, namespace, logger)
|
||||
if create_oauth2_proxy:
|
||||
deployment_name = get_oauth2_proxy_deployment_name(dataset_cmp_name)
|
||||
deployment_model = _construct_oauth2_proxy_deployment_model(dataset_cmp_name, namespace, new_oidc)
|
||||
user_namespace = get_user_namespace(namespace)
|
||||
create_or_update_groups_joint_secret(api_client, dataset_cmp_name, namespace, user_namespace, logger,
|
||||
None, TARGET_KIND_DATASET_COMPONENT)
|
||||
_create_or_update_oauth2_proxy_deployment(api_client, deployment_name, namespace, deployment_model, logger)
|
||||
|
||||
@kopf.subhandler(id=f'update-oauth2-proxy-service-{dataset_cmp_name}')
|
||||
def _update_oauth2_proxy_service_handler(**_):
|
||||
_delete_oauth2_proxy_service_if_exists(api_client, dataset_cmp_name, namespace, logger)
|
||||
if create_oauth2_proxy:
|
||||
service_name = _get_oauth2_proxy_service_name(dataset_cmp_name)
|
||||
service_model = _construct_oauth2_proxy_service_model(dataset_cmp_name, namespace)
|
||||
_create_or_update_oauth2_proxy_service(api_client, service_name, namespace, service_model, logger)
|
||||
|
||||
@kopf.subhandler(id=f'update-oauth2-proxy-np-{dataset_cmp_name}')
|
||||
def _update_oauth2_proxy_np_handler(**_):
|
||||
_delete_oauth2_proxy_np_if_exists(api_client, dataset_cmp_name, namespace, logger)
|
||||
if create_oauth2_proxy:
|
||||
np_name = _get_oauth2_proxy_np_name(dataset_cmp_name)
|
||||
np_model = _construct_oauth2_proxy_np(dataset_cmp_name, namespace)
|
||||
_create_or_update_oauth2_proxy_np(api_client, np_name, namespace, np_model, logger)
|
||||
|
||||
|
||||
def _delete_oauth2_proxy(api_cmp_name, namespace, logger):
|
||||
_delete_oauth2_proxy_ingress_if_exists(api_client, api_cmp_name, namespace, logger)
|
||||
_delete_oauth2_proxy_deployment_if_exists(api_client, api_cmp_name, namespace, logger)
|
||||
_delete_oauth2_proxy_service_if_exists(api_client, api_cmp_name, namespace, logger)
|
||||
_delete_oauth2_proxy_np_if_exists(api_client, api_cmp_name, namespace, logger)
|
||||
|
||||
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'datasetcomponents', when=not_dev_namespace)
|
||||
def create_dataset_component(name, namespace, spec, patch, logger, **_):
|
||||
oidc = _get_oidc_from_spec(spec)
|
||||
user_namespace = get_user_namespace(namespace)
|
||||
|
||||
_update_oauth2_proxy(name, namespace, oidc, logger)
|
||||
|
||||
@kopf.subhandler(id=f'create-or-update-ba-joint-secret-for-{namespace}.{name}')
|
||||
def _create_or_update_ba_joint_secret_handler(**_):
|
||||
create_or_update_basic_auth_joint_secret(api_client=api_client,
|
||||
target=name,
|
||||
target_namespace=namespace,
|
||||
user_namespace=user_namespace,
|
||||
logger=logger,
|
||||
exclude_direct_spec=False,
|
||||
target_kind=TARGET_KIND_DATASET_COMPONENT)
|
||||
|
||||
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'datasetcomponents', when=not_dev_namespace)
|
||||
def delete_dataset_component(spec, name, namespace, logger, **_):
|
||||
user_namespace = get_user_namespace(namespace)
|
||||
_delete_oauth2_proxy(name, namespace, logger)
|
||||
|
||||
create_or_update_basic_auth_joint_secret(api_client=api_client,
|
||||
target=name,
|
||||
target_namespace=namespace,
|
||||
user_namespace=user_namespace,
|
||||
logger=logger,
|
||||
exclude_direct_spec=True,
|
||||
target_kind=TARGET_KIND_DATASET_COMPONENT)
|
||||
|
||||
|
||||
@kopf.on.update('unified-platform.cs.hse.ru', 'datasetcomponents',
|
||||
when=not_dev_namespace, field='spec')
|
||||
def update_dataset_component(name, namespace, old, new, patch, logger, **_):
|
||||
old_spec = old if old else {}
|
||||
spec = new
|
||||
oidc = _get_oidc_from_spec(spec)
|
||||
basic = _get_basic_from_spec(spec)
|
||||
|
||||
_update_oauth2_proxy(name, namespace, oidc, logger)
|
||||
|
||||
@kopf.subhandler(id=f'create-or-update-joint-ba-secret-for-{namespace}.{name}', retries=3)
|
||||
def _create_or_update_ba_joint_secret_handler(**_):
|
||||
# теоретически, можно выделить в отдельный обработчик @kopf.on.field
|
||||
|
||||
old_api = old_spec['restfulApi']
|
||||
old_auth = old_api['auth']
|
||||
old_basic = old_auth.get('basic')
|
||||
old_secret = old_basic.get('credentials') if old_basic else None
|
||||
|
||||
api = spec['restfulApi']
|
||||
auth = api['auth']
|
||||
new_basic = auth.get('basic')
|
||||
new_secret = new_basic.get('credentials') if basic else None
|
||||
|
||||
if old_secret != new_secret:
|
||||
user_namespace = get_user_namespace(namespace)
|
||||
create_or_update_basic_auth_joint_secret(api_client=api_client,
|
||||
target=name,
|
||||
target_namespace=namespace,
|
||||
user_namespace=user_namespace,
|
||||
logger=logger,
|
||||
exclude_direct_spec=False,
|
||||
target_kind=TARGET_KIND_DATASET_COMPONENT)
|
||||
|
||||
|
||||
# todo: использовать для datasetcmp шаблоны Jinja вместо объектов (здесь и далее)
|
||||
# - для соответствия с остальной частью контроллера (см. контроллеры MLComponent, APIComponent)
|
||||
def to_ingress_host_path(path, path_type="Exact"):
|
||||
return V1HTTPIngressPath(
|
||||
backend=V1IngressBackend(
|
||||
service=V1IngressServiceBackend(
|
||||
name=settings.ingress_backend_service_name,
|
||||
port=V1ServiceBackendPort(
|
||||
number=int(settings.ingress_backend_service_port)
|
||||
),
|
||||
)
|
||||
),
|
||||
path=path,
|
||||
path_type=path_type,
|
||||
)
|
||||
|
||||
|
||||
def _should_api_be_published(spec):
|
||||
basic = _get_basic_from_spec(spec)
|
||||
basic_enabled = basic.get('enabled', True) if basic else False
|
||||
oidc = _get_oidc_from_spec(spec)
|
||||
oidc_enabled = oidc.get('enabled', True) if oidc else False
|
||||
if basic_enabled or oidc_enabled:
|
||||
return True
|
||||
# пока считаем, что если любая аутентификация выключена, то доступ закрыт
|
||||
return False
|
||||
|
||||
|
||||
def _append_auth_annotations(annotations, name, namespace, basic, oidc):
|
||||
if basic and basic.get('enabled', True):
|
||||
annotations["nginx.ingress.kubernetes.io/auth-type"] = "basic"
|
||||
annotations["nginx.ingress.kubernetes.io/auth-secret"] = \
|
||||
get_basic_auth_joint_secret_name(name, TARGET_KIND_DATASET_COMPONENT)
|
||||
if oidc and oidc.get('enabled', True):
|
||||
annotations["nginx.ingress.kubernetes.io/satisfy"] = "any"
|
||||
base_url = 'https://$host'
|
||||
proxy_path_prefix = _get_oauth2_proxy_path_prefix(namespace, name)
|
||||
auth_url = base_url + proxy_path_prefix + '/oauth2/auth'
|
||||
auth_signin = base_url + proxy_path_prefix + '/oauth2/start?rd=$escaped_request_uri'
|
||||
annotations["nginx.ingress.kubernetes.io/auth-url"] = auth_url
|
||||
annotations["nginx.ingress.kubernetes.io/auth-signin"] = auth_signin
|
||||
|
||||
|
||||
def _construct_ingress_model(name: str, namespace: str, rules, files_basic_creds: str,
|
||||
basic: dict | None, oidc: dict | None):
|
||||
configuration_snippet = f"""\
|
||||
proxy_set_header Authorization "{files_basic_creds}";
|
||||
proxy_pass_header Authorization;
|
||||
proxy_pass_header Content-Type;
|
||||
if ($request_method != GET) {{
|
||||
return 403;
|
||||
}}
|
||||
rewrite ^(.+?)/datasets/(.+) $1/files/$2 break;\
|
||||
"""
|
||||
|
||||
annotations = {
|
||||
"nginx.ingress.kubernetes.io/use-regex": "true",
|
||||
"nginx.ingress.kubernetes.io/configuration-snippet": configuration_snippet,
|
||||
"nginx.ingress.kubernetes.io/auth-realm": "Authentication Required - datasets",
|
||||
}
|
||||
|
||||
_append_auth_annotations(annotations, name, namespace, basic, oidc)
|
||||
|
||||
return V1Ingress(
|
||||
api_version="networking.k8s.io/v1",
|
||||
kind="Ingress",
|
||||
metadata=V1ObjectMeta(
|
||||
name=_get_dataset_cmp_ingress_name(name),
|
||||
namespace=namespace,
|
||||
annotations=annotations
|
||||
),
|
||||
spec=V1IngressSpec(
|
||||
ingress_class_name="nginx",
|
||||
rules=rules,
|
||||
tls=[V1IngressTLS(hosts=[settings.ingress_rule_host])],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _construct_rules_patch(app: str, rules, dataset_spec, operation):
|
||||
box: str = dataset_spec["box"]
|
||||
rule_base_path = f"/{app}/datasets/{box}"
|
||||
if operation == "update":
|
||||
data_path = f"{rule_base_path}/data"
|
||||
datacmp_ingress_paths = {rule_base_path: to_ingress_host_path(data_path, "ImplementationSpecific")}
|
||||
for version in dataset_spec["versions"]:
|
||||
version_name: str = version["name"]
|
||||
version_path = f"{rule_base_path}/{version_name}"
|
||||
datacmp_ingress_paths[f"{rule_base_path}/{version_name}"] = \
|
||||
to_ingress_host_path(version_path, "ImplementationSpecific")
|
||||
if len(rules):
|
||||
for j, path in reversed(list(enumerate(rules[0].http.paths))):
|
||||
if path.path.startswith(rule_base_path) and path.path not in datacmp_ingress_paths:
|
||||
rules[0].http.paths.pop(j)
|
||||
rule = rules[0]
|
||||
for j, path in enumerate(rule.http.paths):
|
||||
if path.path in datacmp_ingress_paths:
|
||||
rules[0].http.paths[j] = datacmp_ingress_paths.pop(path.path)
|
||||
for new_path in datacmp_ingress_paths.values():
|
||||
rules[0].http.paths.append(new_path)
|
||||
else:
|
||||
rules.append(
|
||||
V1IngressRule(
|
||||
host=settings.ingress_rule_host,
|
||||
http=V1HTTPIngressRuleValue(
|
||||
paths=list(datacmp_ingress_paths.values())
|
||||
),
|
||||
)
|
||||
)
|
||||
elif operation == "remove":
|
||||
rule = rules[0]
|
||||
for j, path in reversed(list(enumerate(rule.http.paths))):
|
||||
if path.path.startswith(rule_base_path):
|
||||
rules[0].http.paths.pop(j)
|
||||
if len(rules[0].http.paths) == 0:
|
||||
return []
|
||||
return rules
|
||||
|
||||
|
||||
def _construct_auth_annotations_patch(name, namespace, basic: dict | None, oidc: dict | None):
|
||||
annotations = {"nginx.ingress.kubernetes.io/auth-type": None,
|
||||
"nginx.ingress.kubernetes.io/auth-secret": None,
|
||||
"nginx.ingress.kubernetes.io/satisfy": None,
|
||||
"nginx.ingress.kubernetes.io/auth-url": None,
|
||||
"nginx.ingress.kubernetes.io/auth-signin": None}
|
||||
|
||||
_append_auth_annotations(annotations, name, namespace, basic, oidc)
|
||||
|
||||
patch = {"metadata": {"annotations": annotations}}
|
||||
return patch
|
||||
|
||||
|
||||
def _get_controller_permissions(namespace: str = None):
|
||||
auth_v1_api = AuthorizationV1Api(api_client)
|
||||
|
||||
response: V1SelfSubjectRulesReview = \
|
||||
auth_v1_api.create_self_subject_rules_review(body={"spec": {"namespace": namespace}})
|
||||
|
||||
output = {}
|
||||
for rule in response.status.resource_rules:
|
||||
for api_group in rule.api_groups:
|
||||
for resource in rule.resources:
|
||||
output.setdefault(f"{api_group}:{resource}", set()).update(set(rule.verbs))
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def _get_files_username_password(namespace, spec):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
auth_secret = core_v1_api.read_namespaced_secret(spec["filesApiSecret"], namespace)
|
||||
secret_creds = list(auth_secret.data.items())[0] # creds ~ (auth, <user:password rows in base64>)
|
||||
credentials_lines = secret_creds[1]
|
||||
username, password = get_basic_auth_credentials(credentials_lines)
|
||||
return username, password
|
||||
|
||||
|
||||
def _username_password_to_basic_creds(username_password):
|
||||
username, password = username_password
|
||||
basic_auth_creds = f'{username}:{password}'
|
||||
basic_auth_creds = 'Basic ' + base64.b64encode(basic_auth_creds.encode('utf-8')).decode('ascii')
|
||||
return basic_auth_creds
|
||||
|
||||
|
||||
def _create_ingress(name, namespace, spec, files_username_password, logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
rules = _construct_rules_patch(namespace, [], spec, "update")
|
||||
restful_api_auth = spec["restfulApi"]["auth"]
|
||||
basic = restful_api_auth.get('basic')
|
||||
oidc = restful_api_auth.get('oidc')
|
||||
|
||||
files_basic_creds = _username_password_to_basic_creds(files_username_password)
|
||||
|
||||
ingress_model = _construct_ingress_model(name, namespace, rules,
|
||||
files_basic_creds=files_basic_creds,
|
||||
basic=basic, oidc=oidc)
|
||||
|
||||
networking_v1_api.create_namespaced_ingress(namespace=namespace, body=ingress_model)
|
||||
logger.info(f"Ingress for DatasetComponent '{name}' created")
|
||||
|
||||
|
||||
def _patch_ingress_rules(name, namespace, ingress_name, ingress, rules, logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
ingress.spec.rules = rules
|
||||
networking_v1_api.patch_namespaced_ingress(name=ingress_name, namespace=namespace, body=ingress)
|
||||
logger.info(f"Ingress for DatasetComponent '{name}' patched")
|
||||
|
||||
|
||||
def _patch_ingress_auth(name, namespace, spec, ingress_name, logger):
|
||||
networking_v1_api = NetworkingV1Api(api_client)
|
||||
restful_api_auth = spec["restfulApi"]["auth"]
|
||||
basic = restful_api_auth.get('basic')
|
||||
oidc = restful_api_auth.get('oidc')
|
||||
anno_patch = _construct_auth_annotations_patch(name, namespace, basic, oidc)
|
||||
networking_v1_api.patch_namespaced_ingress(name=ingress_name, namespace=namespace, body=anno_patch)
|
||||
# todo: для оптимизации можно объединить с патчем выше
|
||||
# (аналогичный _patch_ingress_rules подход для патчинга метаданных не работает),
|
||||
# объединить в один общий метод по аналогии с create_ingress
|
||||
logger.info(f"Ingress for DatasetComponent '{name}' patched")
|
||||
|
||||
|
||||
def _check_required_permissions(namespace: str = None):
|
||||
permissions = _get_controller_permissions(namespace)
|
||||
for resource, verbs in required_permissions.items():
|
||||
if resource not in permissions:
|
||||
raise ProcessingError(f"Insufficient permissions for resource '{resource}'")
|
||||
existing_verbs = permissions[resource]
|
||||
for verb in verbs:
|
||||
if verb not in existing_verbs:
|
||||
raise ProcessingError(f"Insufficient permissions for resource '{resource}' verb '{verb}'")
|
||||
|
||||
|
||||
def _check_controller_has_permissions(namespace, logger):
|
||||
if not UNIP_DATASET_CMP_CHECK_CONTROLLER_PERMISSIONS:
|
||||
return
|
||||
try:
|
||||
_check_required_permissions(namespace)
|
||||
except ProcessingError as exc:
|
||||
msg = exc.args[0]
|
||||
logger.error(msg)
|
||||
raise kopf.TemporaryError(msg)
|
||||
except ApiException as exc:
|
||||
msg = f"Got error while retrieving permissions for namespace '{namespace}'"
|
||||
logger.error(msg)
|
||||
raise kopf.TemporaryError(msg)
|
||||
|
||||
|
||||
def _check_metadata(spec, name, namespace, logger, username_password):
|
||||
if not UNIP_DATASET_CMP_VALIDATE:
|
||||
logger.info('Metadata validation is disabled in controller')
|
||||
return
|
||||
try:
|
||||
validate_metadata(namespace, username_password, spec, logger)
|
||||
except ProcessingError as e:
|
||||
logger.warning(f"DatasetComponent '{name}' has invalid metadata")
|
||||
return {
|
||||
"metadataStatus": e.args[0],
|
||||
"ingressStatus": "Unknown",
|
||||
}
|
||||
|
||||
|
||||
# обработчики DatasetComponent разделены (вместо одного общего on.create, on.update, ...),
|
||||
# потому что исходно настройка Ingress для DatasetComponent была реализована в другом проекте;
|
||||
@kopf.on.create("unified-platform.cs.hse.ru", "v1", "datasetcomponents")
|
||||
@kopf.on.update("unified-platform.cs.hse.ru", "v1", "datasetcomponents")
|
||||
def set_dataset_component_rules(spec, name: str, namespace: str, logger, **_):
|
||||
_check_controller_has_permissions(namespace, logger)
|
||||
|
||||
files_username_password = _get_files_username_password(namespace, spec)
|
||||
ingress_name = _get_dataset_cmp_ingress_name(name)
|
||||
|
||||
error_status = _check_metadata(spec, name, namespace, logger, files_username_password)
|
||||
if error_status:
|
||||
return error_status
|
||||
metadata_status = "OK"
|
||||
ingress_status = "Unknown"
|
||||
|
||||
ingress = read_ingress_if_exists(api_client, ingress_name, namespace, logger)
|
||||
published = _should_api_be_published(spec)
|
||||
|
||||
if ingress is None and published:
|
||||
_create_ingress(name, namespace, spec, files_username_password, logger)
|
||||
ingress_status = "Created"
|
||||
if ingress is None and not published:
|
||||
logger.info(f"Auth for DatasetComponent '{name}' disabled, ingress not created")
|
||||
ingress_status = "NotCreated"
|
||||
if ingress and published:
|
||||
rules = _construct_rules_patch(namespace, ingress.spec.rules, spec, "update")
|
||||
_patch_ingress_rules(name, namespace, ingress_name, ingress, rules, logger)
|
||||
_patch_ingress_auth(name, namespace, spec, ingress_name, logger)
|
||||
ingress_status = "Patched"
|
||||
if ingress and not published:
|
||||
# если publish == False, то независимо ни от чего, ингресс нужно удалить
|
||||
delete_ingress_if_exists(api_client, ingress_name, namespace, logger)
|
||||
ingress_status = "Deleted"
|
||||
|
||||
return {
|
||||
"metadataStatus": metadata_status,
|
||||
"ingressStatus": ingress_status,
|
||||
}
|
||||
|
||||
|
||||
@kopf.on.delete("unified-platform.cs.hse.ru", "v1", "datasetcomponents")
|
||||
def unset_dataset_component_rules(spec, name: str, namespace: str, logger, **_):
|
||||
_check_controller_has_permissions(namespace, logger)
|
||||
|
||||
ingress_name = _get_dataset_cmp_ingress_name(name)
|
||||
ingress = read_ingress_if_exists(api_client, ingress_name, namespace, logger)
|
||||
if not ingress:
|
||||
return
|
||||
|
||||
published = _should_api_be_published(spec)
|
||||
rules = _construct_rules_patch(namespace, ingress.spec.rules, spec, "remove")
|
||||
if rules and published:
|
||||
# это соответствует разработанной ранее логике, с поправкой на обработку условия publish
|
||||
_patch_ingress_rules(name, namespace, ingress_name, ingress, rules, logger)
|
||||
_patch_ingress_auth(name, namespace, spec, ingress_name, logger)
|
||||
else:
|
||||
delete_ingress_if_exists(api_client, ingress_name, namespace, logger)
|
||||
26
controller/src/datasetcmp/utils.py
Normal file
26
controller/src/datasetcmp/utils.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DatasetComponent
|
||||
# Авторы: Климин Н.А., Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
from pydantic_settings import BaseSettings
|
||||
# todo: конфигурировать при помощи чистого Python, не использовать pydantic_settings
|
||||
|
||||
|
||||
class ProcessingError(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
files_api_base_url: str = "https://platform-test.stratpro.hse.ru"
|
||||
ingress_rule_host: str = "platform-test.stratpro.hse.ru"
|
||||
ingress_backend_service_name: str = "files-svc"
|
||||
ingress_backend_service_port: int = 80
|
||||
|
||||
|
||||
settings = Settings()
|
||||
required_permissions = {
|
||||
":secrets": {"get"},
|
||||
"networking.k8s.io:ingresses": {"get", "create", "update", "patch", "delete"},
|
||||
}
|
||||
118
controller/src/datasetcmp/validation.py
Normal file
118
controller/src/datasetcmp/validation.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: DatasetComponent
|
||||
# Авторы: Климин Н.А., Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2025 г.
|
||||
# ============================================================
|
||||
import hashlib
|
||||
import json
|
||||
import pathlib
|
||||
from functools import cache
|
||||
|
||||
import jsonschema
|
||||
import referencing
|
||||
import requests
|
||||
|
||||
from datasetcmp.files import get_content, get_location
|
||||
from datasetcmp.utils import ProcessingError
|
||||
|
||||
|
||||
def validate_checksum(data, hash_sum: str):
|
||||
return hashlib.sha256(data).hexdigest() == hash_sum
|
||||
|
||||
|
||||
_DEFAULT_SCHEMA = "https://raw.githubusercontent.com/charlestati/schema-org-json-schemas/89ed608d2bd03548c10a086c58af27090920bcc5/schemas"
|
||||
|
||||
|
||||
def validate_metadata_file(json_ld: dict, logger, schemas: str = _DEFAULT_SCHEMA):
|
||||
remote_schemas = schemas.startswith("https://") or schemas.startswith("http://")
|
||||
prefix = "schema:"
|
||||
|
||||
@cache
|
||||
def retrieve_schema(uri: str):
|
||||
path = f"{schemas}/{uri.removeprefix(prefix)}.schema.json"
|
||||
if remote_schemas:
|
||||
response = requests.get(path)
|
||||
if response.status_code != 200:
|
||||
raise ProcessingError(f"Internal error. Error while retrieving schema {uri}: {response.text}")
|
||||
schema_ = response.json()
|
||||
else:
|
||||
file = pathlib.Path(path)
|
||||
if not file.is_file():
|
||||
raise ProcessingError(f"Internal error. Error while retrieving schema {uri}: {file} is not a file")
|
||||
schema_ = json.loads(file.read_text())
|
||||
return referencing.Resource.opaque(schema_)
|
||||
|
||||
registry = referencing.Registry(retrieve=retrieve_schema)
|
||||
schema = retrieve_schema("Dataset").contents
|
||||
|
||||
v = jsonschema.Draft202012Validator(schema, registry=registry)
|
||||
errors = sorted(v.iter_errors(json_ld), key=lambda e: e.path)
|
||||
if len(errors):
|
||||
raise ProcessingError(f"Metadata file is invalid, "
|
||||
f"validation errors messages: {[error.message for error in errors]}")
|
||||
|
||||
|
||||
def validate_files(directory: dict, base_path: str, app: str, box: str, version: str,
|
||||
username_password: tuple[str, str], logger):
|
||||
directory_path = f"{app}/files/{box}/{version}/{base_path}{directory['path']}"
|
||||
files = set()
|
||||
for file in directory["files"]:
|
||||
if file["kind"] == "directory":
|
||||
validate_files(file, base_path + f"{directory['path']}/", app, box, version, username_password, logger)
|
||||
continue
|
||||
files.add(f"{directory_path}/{file['path']}")
|
||||
|
||||
location_str = f"{version}/{base_path}/{directory['path']}/"
|
||||
location = get_location(username_password=username_password, app=app, box=box,
|
||||
location=location_str, logger=logger)
|
||||
|
||||
if location.status_code != 200:
|
||||
raise ProcessingError(f"Location '{location_str}' is inaccessible")
|
||||
|
||||
for file in location.json()["files"]:
|
||||
if file["name"] not in files:
|
||||
logger.warning(f"Extra file found in location '{location_str}'")
|
||||
else:
|
||||
files.remove(file["name"])
|
||||
|
||||
if len(files) > 5:
|
||||
files_str = "', '".join(sorted(files)[:5])
|
||||
raise ProcessingError(f"Location '{location_str}' missing {len(files)} files, first 5: '{files_str}'")
|
||||
elif len(files) != 0:
|
||||
files_str = "', '".join(sorted(files))
|
||||
raise ProcessingError(f"Location '{location_str}' missing some files: '{files_str}'")
|
||||
|
||||
|
||||
def validate_metadata(app: str, username_password: tuple[str, str], spec, logger):
|
||||
box: str = spec["box"]
|
||||
for version in spec["versions"]:
|
||||
version_name: str = version["name"]
|
||||
contents: dict = version["contents"]
|
||||
metadata_json_ld = {}
|
||||
files_json = {}
|
||||
for k, v in contents.items():
|
||||
path = v["path"]
|
||||
hash_sum = v["sha256"]
|
||||
location = get_location(username_password=username_password, app=app, box=box,
|
||||
location=f"{version_name}/{path}", logger=logger)
|
||||
if location.status_code != 200:
|
||||
raise ProcessingError(f"Location '{version_name}/{path}' is inaccessible")
|
||||
presigned_get_url = location.json()["presigned_get_url"]
|
||||
content = get_content(presigned_get_url, logger)
|
||||
if k == "metadata":
|
||||
try:
|
||||
metadata_json_ld = json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
raise ProcessingError(f"Metadata file '{version_name}/{path}' cannot be decoded")
|
||||
elif k == "files":
|
||||
try:
|
||||
files_json = json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
raise ProcessingError(f"Files file '{version_name}/{path}' cannot be decoded")
|
||||
if not validate_checksum(content, hash_sum):
|
||||
raise ProcessingError(f"Hashsum for '{version_name}/{path}' is invalid")
|
||||
validate_metadata_file(metadata_json_ld, logger)
|
||||
# Files API currently do not return all files for location and there is no pagination
|
||||
# Waiting for updates
|
||||
# validate_files(files_json, "data", app, box, version_name, username_password, logger)
|
||||
48
controller/src/exceptions.py
Normal file
48
controller/src/exceptions.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
|
||||
from kopf import TemporaryError, PermanentError
|
||||
|
||||
|
||||
class InputValidationPermanentError(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class InputValidationTemporaryError(TemporaryError):
|
||||
pass
|
||||
|
||||
|
||||
class RelatedObjectNotReadyTemporaryError(TemporaryError):
|
||||
pass
|
||||
|
||||
|
||||
class ObjectAlreadyExistsTemporaryError(TemporaryError):
|
||||
pass
|
||||
|
||||
|
||||
class ObjectDoesNotExistTemporaryError(TemporaryError):
|
||||
pass
|
||||
|
||||
|
||||
class ObjectDoesNotExistPermanentError(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class PlatformServicePermanentError(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class PlatformServiceTemporaryError(TemporaryError):
|
||||
pass
|
||||
|
||||
|
||||
class InternalErrorPermanentError(PermanentError):
|
||||
pass
|
||||
|
||||
|
||||
class RelatedObjectValuePermanentError(PermanentError):
|
||||
pass
|
||||
0
controller/src/exp_pipeline/__init__.py
Normal file
0
controller/src/exp_pipeline/__init__.py
Normal file
400
controller/src/exp_pipeline/api.py
Normal file
400
controller/src/exp_pipeline/api.py
Normal file
|
|
@ -0,0 +1,400 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import logging
|
||||
import urllib.parse
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Depends, HTTPException
|
||||
from httpx import AsyncClient
|
||||
from jsonschema.validators import Draft202012Validator
|
||||
from kubernetes_asyncio.client import ApiClient
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from starlette.responses import Response
|
||||
|
||||
from auth import get_user_id, allowed_to_platform_user
|
||||
from config import UNIP_DOMAIN
|
||||
from exp_pipeline.logs import configure_logging
|
||||
from exp_pipeline.pipeline import start_trial, get_trial, get_pipeline_resource, \
|
||||
list_pipeline_resources, list_trials, add_condition_to_status, get_trial_user_id, OpenAPISpecCache, \
|
||||
get_api_resource, set_next_started
|
||||
from exp_pipeline.schema import CreateTrialRequest, PipelineTrial, \
|
||||
PipelineVersionResponse, PipelinesResponse, PipelinesLinks, Link, PipelineLinks, EmbeddedPipeline, \
|
||||
PipelinesEmbedded, PipelineResponse, TrialsResponse, TrialsLinks, TrialsEmbedded, CreateStatusConditionRequest
|
||||
from exp_pipeline.storage.db import init_db, get_db
|
||||
from kube_config import async_unip_load_kube_config
|
||||
|
||||
_http_client: AsyncClient | None = None
|
||||
_k8s_api_client: ApiClient | None = None
|
||||
|
||||
configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI):
|
||||
global _http_client, _k8s_api_client
|
||||
await async_unip_load_kube_config(logger)
|
||||
_http_client = AsyncClient(timeout=30)
|
||||
_k8s_api_client = ApiClient()
|
||||
yield
|
||||
await _http_client.aclose()
|
||||
await _k8s_api_client.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=_lifespan)
|
||||
|
||||
|
||||
# альтернативный вариант, per-request:
|
||||
# async def _get_http_client():
|
||||
# with AsyncClient() as http_client:
|
||||
# yield http_client
|
||||
#
|
||||
# def path_op(http_client: AsyncClient = Depends(_get_http_client))
|
||||
# ...
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def init_db_on_start():
|
||||
await init_db()
|
||||
|
||||
|
||||
openapi_spec_cache = OpenAPISpecCache.get_spec_cache()
|
||||
|
||||
|
||||
def _validate_create_trial_request(create_trial_request: CreateTrialRequest, openapi_spec: dict):
|
||||
components = openapi_spec['components']
|
||||
schemas = components['schemas']
|
||||
openapi_spec = schemas['CreateTrialRequest']
|
||||
|
||||
validator = Draft202012Validator(openapi_spec)
|
||||
request_obj = create_trial_request.model_dump(exclude_none=True)
|
||||
|
||||
if not validator.is_valid(request_obj):
|
||||
errors = sorted(validator.iter_errors(request_obj), key=lambda e: e.path)
|
||||
errors_strs = []
|
||||
errors_model = []
|
||||
for error in errors:
|
||||
s, m = list(error.schema_path), error.message
|
||||
error_struct = {'path': s, 'message': m}
|
||||
errors_strs.append('{0}, {1}'.format(s, m))
|
||||
nested_errors = []
|
||||
for sub_error in sorted(error.context, key=lambda e: e.schema_path):
|
||||
s, m = list(sub_error.schema_path), sub_error.message
|
||||
errors_strs.append('{0}, {1}'.format(s, m))
|
||||
nested_errors.append({'path': s, 'message': m})
|
||||
error_struct['sub'] = nested_errors
|
||||
errors_model.append(error_struct)
|
||||
error_msg = '\n'.join(errors_strs)
|
||||
logger.info(f'Validation error: {error_msg}')
|
||||
raise HTTPException(status_code=400, detail=errors_model)
|
||||
|
||||
|
||||
async def start_trial_op(app_name: str,
|
||||
pipeline_name: str,
|
||||
create_trial_request: CreateTrialRequest,
|
||||
db: AsyncSession,
|
||||
user_id: str):
|
||||
pipeline_resource = await get_pipeline_resource(api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
pipeline_name=pipeline_name)
|
||||
api_resource = await get_api_resource(api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
pipeline_name=pipeline_name)
|
||||
|
||||
openapi_spec = await openapi_spec_cache.get_pipeline_api_spec(k8s_api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
api_resource=api_resource)
|
||||
|
||||
_validate_create_trial_request(create_trial_request, openapi_spec)
|
||||
|
||||
trial = await start_trial(db=db,
|
||||
http_client=_http_client,
|
||||
api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
pipeline_name=pipeline_name,
|
||||
api_resource=api_resource,
|
||||
pipeline_resource=pipeline_resource,
|
||||
trial_inputs=create_trial_request.inputs,
|
||||
trial_output_vars=create_trial_request.output_vars)
|
||||
|
||||
return trial
|
||||
|
||||
|
||||
@app.post("/{app_name}/pipelines/{pipeline_name}/trials", response_model=PipelineTrial)
|
||||
async def post_trial_op(app_name: str,
|
||||
pipeline_name: str,
|
||||
create_trial_request: CreateTrialRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
user_id: str = Depends(get_user_id)):
|
||||
trial = await start_trial_op(app_name=app_name,
|
||||
pipeline_name=pipeline_name,
|
||||
create_trial_request=create_trial_request,
|
||||
db=db,
|
||||
user_id=user_id)
|
||||
|
||||
return trial
|
||||
|
||||
|
||||
@app.post("/{app_name}/pipelines/{pipeline_name}/trials/continue",
|
||||
response_model=PipelineTrial,
|
||||
dependencies=[Depends(allowed_to_platform_user)])
|
||||
async def post_next_trial_op(app_name: str,
|
||||
pipeline_name: str,
|
||||
next_pipeline_name: str,
|
||||
tracking_id: str,
|
||||
create_trial_request: CreateTrialRequest,
|
||||
db: AsyncSession = Depends(get_db)):
|
||||
user_id = await get_trial_user_id(db=db,
|
||||
app_name=app_name,
|
||||
pipeline_name=None,
|
||||
tracking_id=tracking_id)
|
||||
|
||||
next_trial = await start_trial_op(app_name=app_name,
|
||||
pipeline_name=next_pipeline_name,
|
||||
create_trial_request=create_trial_request,
|
||||
db=db,
|
||||
user_id=user_id)
|
||||
|
||||
await set_next_started(db=db,
|
||||
tracking_id=tracking_id,
|
||||
app_name=app_name,
|
||||
pipeline_name=pipeline_name,
|
||||
next_tracking_id=next_trial.tracking_id)
|
||||
|
||||
return next_trial
|
||||
|
||||
|
||||
@app.get("/{app_name}/pipelines/{pipeline_name}/check",
|
||||
dependencies=[Depends(allowed_to_platform_user)])
|
||||
async def check_op(app_name: str,
|
||||
pipeline_name: str):
|
||||
return Response()
|
||||
|
||||
|
||||
@app.get("/{app_name}/trials/{tracking_id}",
|
||||
response_model=PipelineTrial)
|
||||
async def get_trial_op(app_name: str,
|
||||
tracking_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
user_id: str = Depends(get_user_id)):
|
||||
trial = await get_trial(db=db,
|
||||
app_name=app_name,
|
||||
pipeline_name=None,
|
||||
tracking_id=tracking_id,
|
||||
user_id=user_id)
|
||||
|
||||
return trial
|
||||
|
||||
|
||||
@app.post("/{app_name}/pipelines/{pipeline_name}/trials/{tracking_id}/status/conditions",
|
||||
dependencies=[Depends(allowed_to_platform_user)])
|
||||
async def post_trial_status_condition_op(response: Response,
|
||||
app_name: str,
|
||||
pipeline_name: str,
|
||||
tracking_id: str,
|
||||
create_condition_request: CreateStatusConditionRequest,
|
||||
db: AsyncSession = Depends(get_db)):
|
||||
modified = await add_condition_to_status(db=db,
|
||||
tracking_id=tracking_id,
|
||||
type_=create_condition_request.type,
|
||||
message=create_condition_request.message,
|
||||
reason=create_condition_request.reason,
|
||||
transition_time=create_condition_request.transition_time,
|
||||
stage=create_condition_request.stage,
|
||||
app_name=app_name,
|
||||
pipeline_name=pipeline_name)
|
||||
response.status_code = 200 if modified else 204
|
||||
return
|
||||
|
||||
|
||||
@app.get('/{app_name}/pipelines/{pipeline_name}/version',
|
||||
response_model=PipelineVersionResponse,
|
||||
dependencies=[Depends(get_user_id)])
|
||||
async def get_pipeline_version_op(app_name: str,
|
||||
pipeline_name: str):
|
||||
api_resource = await get_api_resource(api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
pipeline_name=pipeline_name)
|
||||
|
||||
try:
|
||||
openapi_spec = await openapi_spec_cache.get_pipeline_api_spec(k8s_api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
api_resource=api_resource)
|
||||
except Exception as exc:
|
||||
logger.exception('OpenAPI specification generation error, '
|
||||
'specification not available', exc_info=exc)
|
||||
openapi_spec = None
|
||||
|
||||
return PipelineVersionResponse(openapi_spec=openapi_spec,
|
||||
license=None)
|
||||
|
||||
|
||||
def _get_list_request_self_link(cursor, limit, uri):
|
||||
query_params = []
|
||||
if cursor:
|
||||
query_params.append(f'cursor={cursor}')
|
||||
if limit:
|
||||
query_params.append(f'limit={limit}')
|
||||
self_href = urllib.parse.urljoin(f'https://{UNIP_DOMAIN}', uri)
|
||||
if query_params:
|
||||
query = '&'.join(query_params)
|
||||
self_href += f'?{query}'
|
||||
|
||||
self_link = Link(href=self_href)
|
||||
return self_link
|
||||
|
||||
|
||||
def _get_list_request_next_link(new_cursor, limit_set, uri):
|
||||
if new_cursor:
|
||||
query_params = [f'cursor={new_cursor}', f'limit={limit_set}']
|
||||
query = '&'.join(query_params)
|
||||
cursor_href = urllib.parse.urljoin(f'https://{UNIP_DOMAIN}', f'{uri}?{query}')
|
||||
cursor_link = Link(href=cursor_href)
|
||||
else:
|
||||
cursor_link = None
|
||||
return cursor_link
|
||||
|
||||
|
||||
@app.get('/{app_name}/pipelines/{pipeline_name}/trials',
|
||||
response_model=TrialsResponse,
|
||||
dependencies=[Depends(get_user_id)])
|
||||
async def get_pipeline_trials_op(app_name: str,
|
||||
pipeline_name: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
user_id: str = Depends(get_user_id),
|
||||
cursor: int | None = None,
|
||||
limit: int | None = None):
|
||||
limit_set = limit or 10
|
||||
if limit_set > 30:
|
||||
limit_set = 30
|
||||
if limit_set < 0:
|
||||
limit_set = 10
|
||||
trials, new_cursor, counts = await list_trials(db=db,
|
||||
app_name=app_name,
|
||||
pipeline_name=pipeline_name,
|
||||
user_id=user_id,
|
||||
cursor=cursor,
|
||||
limit=limit_set)
|
||||
|
||||
total_item_count, remaining_item_count = counts
|
||||
|
||||
uri = f'{app_name}/pipelines/{pipeline_name}/trials'
|
||||
self_link = _get_list_request_self_link(cursor, limit, uri)
|
||||
cursor_link = _get_list_request_next_link(new_cursor, limit_set, uri)
|
||||
|
||||
links = TrialsLinks(self=self_link, next=cursor_link)
|
||||
|
||||
te = TrialsEmbedded(trials=trials)
|
||||
|
||||
resp = TrialsResponse(_links=links,
|
||||
total_item_count=total_item_count,
|
||||
remaining_item_count=remaining_item_count,
|
||||
_embedded=te)
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
@app.get('/{app_name}/trials',
|
||||
response_model=TrialsResponse,
|
||||
dependencies=[Depends(get_user_id)])
|
||||
async def get_trials_op(app_name: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
user_id: str = Depends(get_user_id),
|
||||
cursor: int | None = None,
|
||||
limit: int | None = None):
|
||||
limit_set = limit or 10
|
||||
if limit_set > 30:
|
||||
limit_set = 30
|
||||
if limit_set < 0:
|
||||
limit_set = 10
|
||||
trials, new_cursor, counts = await list_trials(db=db,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
cursor=cursor,
|
||||
limit=limit_set)
|
||||
|
||||
total_item_count, remaining_item_count = counts
|
||||
|
||||
uri = f'{app_name}/trials'
|
||||
self_link = _get_list_request_self_link(cursor, limit, uri)
|
||||
cursor_link = _get_list_request_next_link(new_cursor, limit_set, uri)
|
||||
|
||||
links = TrialsLinks(self=self_link, next=cursor_link)
|
||||
|
||||
te = TrialsEmbedded(trials=trials)
|
||||
|
||||
resp = TrialsResponse(_links=links,
|
||||
total_item_count=total_item_count,
|
||||
remaining_item_count=remaining_item_count,
|
||||
_embedded=te)
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
def _get_pipeline_links(app_name, pipeline_name):
|
||||
self_href = urllib.parse.urljoin(f'https://{UNIP_DOMAIN}', f'{app_name}/pipelines/{pipeline_name}')
|
||||
self_link = Link(href=self_href)
|
||||
trials_href = f'{self_href}/trials'
|
||||
trials_link = Link(href=trials_href)
|
||||
version_href = f'{self_href}/version'
|
||||
version_link = Link(href=version_href)
|
||||
pl = PipelineLinks(self=self_link, trials=trials_link, version=version_link)
|
||||
return pl
|
||||
|
||||
|
||||
@app.get('/{app_name}/pipelines',
|
||||
response_model=PipelinesResponse,
|
||||
dependencies=[Depends(get_user_id)])
|
||||
async def get_pipelines_op(response: Response,
|
||||
app_name: str,
|
||||
cursor: str | None = None,
|
||||
limit: int | None = None):
|
||||
limit_set = limit or 10
|
||||
pipelines, new_cursor, remaining_item_count, expired = await list_pipeline_resources(api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
cursor=cursor,
|
||||
limit=limit_set)
|
||||
|
||||
uri = f'{app_name}/pipelines'
|
||||
self_link = _get_list_request_self_link(cursor, limit, uri)
|
||||
cursor_link = _get_list_request_next_link(new_cursor, limit_set, uri)
|
||||
|
||||
links = PipelinesLinks(self=self_link, next=cursor_link)
|
||||
|
||||
embedded_pipelines = []
|
||||
|
||||
for p in pipelines:
|
||||
pipeline_name = p['metadata']['name']
|
||||
pl = _get_pipeline_links(app_name, pipeline_name)
|
||||
ep = EmbeddedPipeline(name=pipeline_name, _links=pl)
|
||||
embedded_pipelines.append(ep)
|
||||
|
||||
pe = PipelinesEmbedded(pipelines=embedded_pipelines)
|
||||
|
||||
resp = PipelinesResponse(_links=links, remaining_item_count=remaining_item_count, _embedded=pe)
|
||||
if expired:
|
||||
response.status_code = 410
|
||||
return resp
|
||||
|
||||
|
||||
@app.get('/{app_name}/pipelines/{pipeline_name}',
|
||||
response_model=PipelineResponse,
|
||||
dependencies=[Depends(get_user_id)])
|
||||
async def get_pipeline_op(app_name: str,
|
||||
pipeline_name: str):
|
||||
pipeline_res = await get_pipeline_resource(api_client=_k8s_api_client,
|
||||
app_name=app_name,
|
||||
pipeline_name=pipeline_name)
|
||||
pipeline_name = pipeline_res['metadata']['name']
|
||||
pipeline_links = _get_pipeline_links(app_name, pipeline_name)
|
||||
pipeline_definition = pipeline_res['spec']
|
||||
|
||||
resp = PipelineResponse(name=pipeline_name, _links=pipeline_links, definition=pipeline_definition)
|
||||
|
||||
return resp
|
||||
16
controller/src/exp_pipeline/api/docker.md
Normal file
16
controller/src/exp_pipeline/api/docker.md
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
```
|
||||
docker run --rm -p 8080:8080 \
|
||||
--name swagger-unip-pipeline \
|
||||
-e SWAGGER_FILE=/api/openapi.yaml \
|
||||
-e PORT=8080 \
|
||||
-v $PWD/controller/src/exp_pipeline/api:/api swaggerapi/swagger-editor
|
||||
```
|
||||
|
||||
Для использования спецификации 3.1:
|
||||
|
||||
```
|
||||
docker run --rm -p 8080:80 \
|
||||
--name swagger-unip-pipeline \
|
||||
-e SWAGGER_FILE=/api/openapi.yaml \
|
||||
-v $PWD/controller/src/exp_pipeline/api:/api swaggerapi/swagger-editor:next-v5
|
||||
```
|
||||
1756
controller/src/exp_pipeline/api/openapi-snapshot.json
Normal file
1756
controller/src/exp_pipeline/api/openapi-snapshot.json
Normal file
File diff suppressed because it is too large
Load diff
1256
controller/src/exp_pipeline/api/openapi.yaml
Normal file
1256
controller/src/exp_pipeline/api/openapi.yaml
Normal file
File diff suppressed because it is too large
Load diff
52
controller/src/exp_pipeline/api/to-openapi-base.md
Normal file
52
controller/src/exp_pipeline/api/to-openapi-base.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Инструкция по созданию базовой OpenAPI спецификации
|
||||
|
||||
Базовая спецификация используется для генерации OpenAPI спецификации для конкретных пайплайнов.
|
||||
|
||||
**Удаление секций "examples" рекурсивно**
|
||||
|
||||
```jq
|
||||
walk(if type == "object" and has("examples") then del(.examples) else . end)
|
||||
```
|
||||
|
||||
**Удалить все примеры (атрибуты examples)**
|
||||
|
||||
```shell
|
||||
cat openapi-base.json | jq 'walk(if type == "object" and has("examples") then del(.examples) else . end)' > openapi-base-fixed.json
|
||||
```
|
||||
|
||||
**Удалить все, что связано с X-Request-Id (все headers, пока X-Request-Id является единственным заголовком)**
|
||||
|
||||
Удалить атрибуты headers:
|
||||
|
||||
```shell
|
||||
cat openapi-base-fixed.json | jq 'walk(if type == "object" and has("headers") then del(.headers) else . end)' > openapi-base-fixed.json
|
||||
```
|
||||
|
||||
Удалить X-Request-Id из параметров:
|
||||
|
||||
```shell
|
||||
cat openapi-base-fixed.json | jq 'walk(if type=="object" and .name == "X-Request-Id" then empty else . end)' > openapi-base-fixed.json
|
||||
```
|
||||
|
||||
**Удалить непубличные пути**
|
||||
|
||||
```shell
|
||||
cat openapi-base-fixed.json | jq 'del(.paths/"/{app}/pipelines/{pipeline}/trials/continue")' > openapi-base-fixed.json
|
||||
cat openapi-base-fixed.json | jq 'del(.paths/"/{app}/pipelines/{pipeline}/check")' > openapi-base-fixed.json
|
||||
cat openapi-base-fixed.json | jq 'del(.paths/"/{app}/pipelines/{pipeline}/trials/{tracking_id}/status/conditions")' > openapi-base-fixed.json
|
||||
cat openapi-base-fixed.json | jq 'del(.components/schemas/CreateStatusConditionRequest)' > openapi-base-fixed.json
|
||||
```
|
||||
|
||||
Команды вместе:
|
||||
|
||||
```shell
|
||||
jq 'walk(if type == "object" and has("examples") then del(.examples) else . end) |
|
||||
walk(if type == "object" and has("headers") then del(.headers) else . end) |
|
||||
walk(if type == "object" and .name == "X-Request-Id" then empty else . end) |
|
||||
del(.paths."/{app}/pipelines/{pipeline}/trials/continue") |
|
||||
del(.paths."/{app}/pipelines/{pipeline}/check") |
|
||||
del(.paths."/{app}/pipelines/{pipeline}/trials/{tracking_id}/status/conditions") |
|
||||
del(.components.schemas.CreateStatusConditionRequest)
|
||||
' openapi-base.json \
|
||||
> openapi-base-fixed.json
|
||||
```
|
||||
98
controller/src/exp_pipeline/box.py
Normal file
98
controller/src/exp_pipeline/box.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import logging
|
||||
import posixpath
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import HTTPException
|
||||
from kubernetes_asyncio.client import CustomObjectsApi, ApiException, ApiClient
|
||||
|
||||
from exp_pipeline.schema import ConnectedBox
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _get_s3_box_resource(api_client: ApiClient, s3_box_name, namespace):
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
try:
|
||||
# метод get_namespaced_custom_object возвращает объект, а не сопрограмму
|
||||
s3_box_res, _, _ = await co_api.get_namespaced_custom_object_with_http_info(
|
||||
"unified-platform.cs.hse.ru", "v1", namespace,
|
||||
"databoxes", s3_box_name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.error(f'Box {s3_box_name} not found in namespace {namespace}')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
raise exc
|
||||
|
||||
if 's3Storage' not in s3_box_res['status']:
|
||||
logger.error(f'Box {s3_box_name} in namespace {namespace} doesnt have status')
|
||||
raise HTTPException(status_code=500, detail='Internal server error')
|
||||
|
||||
return s3_box_res
|
||||
|
||||
|
||||
async def _get_s3_box_attrs(api_client: ApiClient, s3_box_name, namespace):
|
||||
s3_box_res = await _get_s3_box_resource(api_client, s3_box_name, namespace)
|
||||
|
||||
s3_box_uid = UUID(s3_box_res['metadata']['uid'])
|
||||
|
||||
s3_storage_status = s3_box_res['status']['s3Storage']
|
||||
if ('csiS3PersistentVolumeClaimName' not in s3_storage_status) \
|
||||
or (not s3_storage_status['csiS3PersistentVolumeClaimName']):
|
||||
logger.error(f'Box {s3_box_name} in namespace {namespace} '
|
||||
f'csiS3PersistentVolumeClaimName not set')
|
||||
raise HTTPException(status_code=500, detail='Internal server error')
|
||||
|
||||
return s3_box_uid, s3_storage_status['csiS3PersistentVolumeClaimName']
|
||||
|
||||
|
||||
async def construct_connected_box(api_client: ApiClient, connected_box_section: dict,
|
||||
namespace: str) -> ConnectedBox:
|
||||
connected_box_name = connected_box_section['name']
|
||||
if 'mountS3Box' in connected_box_section:
|
||||
mount_s3_box = connected_box_section['mountS3Box']
|
||||
box_name = mount_s3_box['s3BoxName']
|
||||
s3_box_name, dataset_ref_box_name = box_name, None
|
||||
elif 'mountDataset' in connected_box_section:
|
||||
mount_dataset = connected_box_section['mountDataset']
|
||||
box_name = mount_dataset['datasetReferenceName']
|
||||
s3_box_name, dataset_ref_box_name = None, box_name
|
||||
else:
|
||||
logger.error(f'Connected box {connected_box_name} may be mountS3Box or mountDataset')
|
||||
raise HTTPException(status_code=503, detail='Service unavailable')
|
||||
mode = 'mount'
|
||||
_, pvc_name = await _get_s3_box_attrs(api_client, box_name, namespace)
|
||||
cb = ConnectedBox(name=connected_box_name,
|
||||
mode=mode,
|
||||
s3_box_name=s3_box_name,
|
||||
dataset_ref_box_name=dataset_ref_box_name,
|
||||
pvc_name=pvc_name)
|
||||
if 'path' in connected_box_section:
|
||||
cb.mount_path = connected_box_section['path']
|
||||
if 'default' in connected_box_section and 'mountS3Box' in connected_box_section:
|
||||
cb.default = bool(connected_box_section['default'])
|
||||
return cb
|
||||
|
||||
|
||||
def get_default_box(user_id: str,
|
||||
pipeline_name: str,
|
||||
connected_boxes: list[ConnectedBox]
|
||||
) -> ConnectedBox:
|
||||
if not connected_boxes:
|
||||
logger.error(f'No connected boxes specified for pipeline {pipeline_name}, but at least one required')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
for cb in connected_boxes:
|
||||
if cb.default:
|
||||
return cb
|
||||
# todo: возможно, следует проверять, что это не mountDataset
|
||||
return connected_boxes[0]
|
||||
|
||||
|
||||
def get_path_inside_box(user_id, location):
|
||||
return posixpath.join('users', user_id, 'file_groups', location)
|
||||
21
controller/src/exp_pipeline/client_config.py
Normal file
21
controller/src/exp_pipeline/client_config.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
APP_NAME_VAR_NAME = 'UNIP_PIPELINE_APP_NAME'
|
||||
UNIP_PIPELINE_APP_NAME = os.getenv(APP_NAME_VAR_NAME)
|
||||
|
||||
PIPELINE_VAR_NAME = 'UNIP_PIPELINE_PIPELINE_NAME'
|
||||
UNIP_PIPELINE_PIPELINE_NAME = os.getenv(PIPELINE_VAR_NAME)
|
||||
|
||||
API_VAR_NAME = 'UNIP_PIPELINE_API'
|
||||
UNIP_PIPELINE_API = os.getenv(API_VAR_NAME)
|
||||
|
||||
TRACKING_ID_VAR_NAME = 'UNIP_TRACKING_ID'
|
||||
UNIP_TRACKING_ID = os.getenv(TRACKING_ID_VAR_NAME)
|
||||
|
||||
INTERNAL_API_USER_ID = 'platform-user'
|
||||
19
controller/src/exp_pipeline/config.py
Normal file
19
controller/src/exp_pipeline/config.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
UNIP_PIPELINE_RUN_MODE = os.getenv('UNIP_PIPELINE_RUN_MODE')
|
||||
|
||||
UNIP_FILES_API = os.getenv('UNIP_FILES_API')
|
||||
|
||||
UNIP_PIPELINES_API = os.getenv('UNIP_PIPELINES_API')
|
||||
|
||||
UNIP_DATETIME_FORMAT = os.getenv('UNIP_DATETIME_FORMAT', '%Y-%m-%dT%H:%M:%S.%f%z')
|
||||
|
||||
UNIP_PIPELINE_VALIDATION_IMAGE = os.getenv('UNIP_PIPELINE_VALIDATION_IMAGE')
|
||||
|
||||
|
||||
57
controller/src/exp_pipeline/handlers.py
Normal file
57
controller/src/exp_pipeline/handlers.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
|
||||
import kopf
|
||||
|
||||
from bound import not_dev_namespace
|
||||
from exp_pipeline.pipeline import set_trial_failed, \
|
||||
set_trial_completed
|
||||
from exp_pipeline.storage.db import AsyncDbSession
|
||||
from kopf_k8s_client import async_api_client
|
||||
|
||||
PRIVATE_API_BA_SECRET_NAME_TEMPLATE = 'pipeline-{0}-api-ba-cred'
|
||||
|
||||
|
||||
def _job_kind_event_filter(body, **_): return body['involvedObject']['kind'] == 'Job'
|
||||
|
||||
|
||||
def _completed_reason_event_filter(body, **_): return body['reason'] == 'Completed'
|
||||
|
||||
|
||||
def _backoff_limit_exceeded_reason_event_filter(body, **_): return body['reason'] == 'BackoffLimitExceeded'
|
||||
|
||||
|
||||
# @kopf.on.create('v1', 'events', field='involvedObject.kind', value='Job', retries=3)
|
||||
@kopf.on.create('v1',
|
||||
'events',
|
||||
when=kopf.all_([_job_kind_event_filter,
|
||||
_completed_reason_event_filter,
|
||||
not_dev_namespace]),
|
||||
retries=3)
|
||||
async def set_trial_completed_on_job_event(body, **_):
|
||||
job_ref = body['involvedObject']
|
||||
job_name = job_ref['name']
|
||||
job_namespace = job_ref['namespace']
|
||||
|
||||
async with AsyncDbSession() as db:
|
||||
await set_trial_completed(db, async_api_client, job_name, job_namespace)
|
||||
|
||||
|
||||
@kopf.on.create('v1',
|
||||
'events',
|
||||
when=kopf.all_([_job_kind_event_filter,
|
||||
_backoff_limit_exceeded_reason_event_filter,
|
||||
not_dev_namespace]),
|
||||
retries=3)
|
||||
async def set_trial_failed_on_job_event(body, **_):
|
||||
job_ref = body['involvedObject']
|
||||
job_name = job_ref['name']
|
||||
job_namespace = job_ref['namespace']
|
||||
|
||||
async with AsyncDbSession() as db:
|
||||
await set_trial_failed(db, async_api_client, job_name, job_namespace)
|
||||
|
||||
20
controller/src/exp_pipeline/jinja.py
Normal file
20
controller/src/exp_pipeline/jinja.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
jinja_env = Environment(
|
||||
loader=FileSystemLoader('templates/experiment-pipeline'),
|
||||
lstrip_blocks=True,
|
||||
trim_blocks=True
|
||||
)
|
||||
|
||||
|
||||
basic_jinja_env = Environment(
|
||||
loader=FileSystemLoader('templates/basic-resources'),
|
||||
lstrip_blocks=True,
|
||||
trim_blocks=True
|
||||
)
|
||||
32
controller/src/exp_pipeline/logs.py
Normal file
32
controller/src/exp_pipeline/logs.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from logging.config import dictConfig
|
||||
|
||||
|
||||
def configure_logging():
|
||||
dictConfig({
|
||||
'version': 1,
|
||||
'disable_existing_loggers': False,
|
||||
'formatters': {
|
||||
'default': {
|
||||
'format': '[%(asctime)s] [%(levelname)s] [%(name)s]: %(message)s',
|
||||
}
|
||||
},
|
||||
'handlers': {
|
||||
'default': {
|
||||
'class': 'logging.StreamHandler',
|
||||
'stream': 'ext://sys.stdout',
|
||||
'formatter': 'default'
|
||||
}
|
||||
},
|
||||
'loggers': {
|
||||
'root': {
|
||||
'level': 'INFO',
|
||||
'handlers': ['default']
|
||||
}
|
||||
}
|
||||
})
|
||||
1097
controller/src/exp_pipeline/openapi-base.json
Normal file
1097
controller/src/exp_pipeline/openapi-base.json
Normal file
File diff suppressed because it is too large
Load diff
542
controller/src/exp_pipeline/openapi.py
Normal file
542
controller/src/exp_pipeline/openapi.py
Normal file
|
|
@ -0,0 +1,542 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from copy import copy, deepcopy
|
||||
from typing import Literal, Optional, List, Iterable, get_args
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from exp_pipeline.schema import DATATYPES, DATATYPE_TO_CONTENT_TYPE, OUTPUT_DATATYPES
|
||||
|
||||
_base_spec = None
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ApiVar(BaseModel):
|
||||
kind: Literal["input", "output"]
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
datatypes: List[DATATYPES]
|
||||
content_types: Optional[List[str]] = None
|
||||
required: Optional[bool] = None
|
||||
|
||||
|
||||
def _extend_content_types_given_datatypes(content_types, datatypes):
|
||||
datatypes = set(datatypes) - {'FILE'}
|
||||
content_types_to_add = {v for k, v in DATATYPE_TO_CONTENT_TYPE.items() if k in datatypes}
|
||||
content_types_to_add = {ct for ct in content_types_to_add if ct not in content_types}
|
||||
content_types += content_types_to_add
|
||||
return content_types
|
||||
|
||||
|
||||
def _create_var_datatypes_content_types(section, supported_datatypes):
|
||||
if 'type' in section:
|
||||
type_section = section['type']
|
||||
if 'datatypes' in type_section:
|
||||
datatypes = type_section['datatypes']
|
||||
datatypes = [d for d in datatypes if d in supported_datatypes]
|
||||
if 'FILE' not in datatypes:
|
||||
datatypes.append('FILE')
|
||||
else:
|
||||
datatypes = copy(supported_datatypes)
|
||||
if 'contentTypes' in type_section:
|
||||
content_types = type_section['contentTypes']
|
||||
_extend_content_types_given_datatypes(content_types, datatypes)
|
||||
else:
|
||||
content_types = None
|
||||
else:
|
||||
datatypes = copy(supported_datatypes)
|
||||
content_types = None
|
||||
return datatypes, content_types
|
||||
|
||||
|
||||
def _create_input_var(input_section: dict, supported_datatypes) -> ApiVar:
|
||||
var_attrs = {
|
||||
'kind': 'input',
|
||||
'name': input_section['name'],
|
||||
'description': input_section.get('description') or None,
|
||||
}
|
||||
required = True
|
||||
if 'required' in input_section:
|
||||
required = input_section['required'] == 'true' or \
|
||||
input_section['required'] is True
|
||||
|
||||
datatypes, content_types = _create_var_datatypes_content_types(input_section, supported_datatypes)
|
||||
|
||||
var_attrs.update({
|
||||
'content_types': content_types,
|
||||
'datatypes': datatypes,
|
||||
'required': required
|
||||
})
|
||||
api_var = ApiVar(**var_attrs)
|
||||
return api_var
|
||||
|
||||
|
||||
def _create_output_var(output_section: dict, supported_datatypes) -> ApiVar:
|
||||
var_attrs = {
|
||||
'kind': 'output',
|
||||
'name': output_section['name'],
|
||||
'description': output_section.get('description') or None,
|
||||
}
|
||||
|
||||
datatypes, content_types = _create_var_datatypes_content_types(output_section, supported_datatypes)
|
||||
|
||||
var_attrs.update({
|
||||
'content_types': content_types,
|
||||
'datatypes': datatypes
|
||||
})
|
||||
api_var = ApiVar(**var_attrs)
|
||||
return api_var
|
||||
|
||||
|
||||
def _check_box_path(input_section):
|
||||
if 'mountFrom' in input_section:
|
||||
mount_from = input_section['mountFrom']
|
||||
box = mount_from['box']
|
||||
return 'boxPath' in box
|
||||
return False
|
||||
|
||||
|
||||
def _get_name(api_var: ApiVar):
|
||||
var_char = 'входной' if api_var.kind == 'input' else 'выходной'
|
||||
return {
|
||||
'const': api_var.name,
|
||||
'description': f'Имя {var_char} переменной.'
|
||||
}
|
||||
|
||||
|
||||
def _get_var_base(api_var: ApiVar):
|
||||
openapi_spec = {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'name': _get_name(api_var)
|
||||
},
|
||||
'required': [
|
||||
'name'
|
||||
]
|
||||
}
|
||||
if api_var.description:
|
||||
openapi_spec['description'] = api_var.description
|
||||
|
||||
return openapi_spec
|
||||
|
||||
|
||||
def _get_input_var_datatype(api_var: ApiVar):
|
||||
return {
|
||||
'type': 'string',
|
||||
'description': f'Тип значения входной переменной.',
|
||||
'enum': api_var.datatypes
|
||||
}
|
||||
|
||||
|
||||
def _get_output_var_datatype(api_var: ApiVar):
|
||||
return {
|
||||
'type': 'string',
|
||||
'description': f'Тип значения выходной переменной.',
|
||||
'enum': api_var.datatypes
|
||||
}
|
||||
|
||||
|
||||
def _get_start_input_var_data(api_var: ApiVar):
|
||||
description = """Значение входной переменной.
|
||||
Если datatype из {'FILE', 'WEBSITE'}, то содержит локацию в сервисе Files,
|
||||
по которой можно получить значение входной переменной.
|
||||
Может указывать как на файл, так и на файловую группу.
|
||||
Если datatype не из {'FILE', 'WEBSITE'}, то содержит строку с сериализованным
|
||||
значением входной переменной, которое будет сохранено в файл.
|
||||
Для каждого datatype != 'FILE' для файла,
|
||||
в который сохраняется значение переменной,
|
||||
установлен предопределенный тип содержимого (content_type)."""
|
||||
return {
|
||||
# 'type': any, if type not specified
|
||||
'description': description
|
||||
}
|
||||
|
||||
|
||||
def _get_start_output_var_data(api_var: ApiVar):
|
||||
description = """Локация выходной переменной в сервисе Files.
|
||||
Указывает, куда должны быть сохранены значения выходной
|
||||
переменной. Должна соответствовать файловой группе.
|
||||
Может быть не указана, тогда будет создана файловая группа со
|
||||
сгенерированным именем."""
|
||||
return {
|
||||
'type': 'string',
|
||||
'description': description
|
||||
}
|
||||
|
||||
|
||||
def _get_result_input_var_data(api_var: ApiVar):
|
||||
description = \
|
||||
"""Значение входной переменной.
|
||||
Локация в сервисе Files, по которой можно получить значение входной переменной.
|
||||
Может указывать как на файл, так и на файловую группу.
|
||||
Если при запуске пайплайна входное значение было
|
||||
передано с datatype не из {'FILE', 'WEBSITE'}, то значением атрибута
|
||||
является локация созданного файла, в который записано
|
||||
переданное значение."""
|
||||
return {
|
||||
'type': 'string',
|
||||
'description': description
|
||||
}
|
||||
|
||||
|
||||
def _get_result_output_var_data(api_var: ApiVar):
|
||||
description = """Имя ассоциированной локации в сервисе Files.
|
||||
Локация соответствует файловой группе.
|
||||
Если запуск еще не завершен, то
|
||||
результаты в файловой группе будут
|
||||
отсутствовать."""
|
||||
return {
|
||||
'type': 'string',
|
||||
'description': description
|
||||
}
|
||||
|
||||
|
||||
def _get_result_var_data_box(api_var: ApiVar):
|
||||
description = 'Имя ассоциированного ящика DataBox'
|
||||
return {
|
||||
'type': 'string',
|
||||
'description': description
|
||||
}
|
||||
|
||||
|
||||
def _get_start_var_shape(api_var: ApiVar):
|
||||
var_char = 'входной' if api_var.kind == 'input' else 'выходной'
|
||||
description = f'Размерность значения {var_char} переменной.\n' \
|
||||
'Применимо к любым типам данных входной переменной.\n'
|
||||
return {
|
||||
'description': description,
|
||||
'oneOf': [
|
||||
{
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'integer'
|
||||
},
|
||||
'minItems': 1,
|
||||
'maxItems': 32
|
||||
},
|
||||
{
|
||||
'type': 'integer'
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _get_start_input_var_content_type(api_var: ApiVar):
|
||||
description = """Тип содержимого файла или файлов, при типе входной переменной
|
||||
datatype == \"FILE\". Тип содержимого файла должен быть задан
|
||||
при запуске пайплайна.
|
||||
При типе входной переменной datatype != \"FILE\" переданный тип содержимого
|
||||
файла игнорируется. Для каждого datatype установлен предопределенный тип содержимого."""
|
||||
spec = {
|
||||
'type': 'string',
|
||||
'description': description
|
||||
}
|
||||
if api_var.content_types:
|
||||
spec['enum'] = api_var.content_types
|
||||
return spec
|
||||
|
||||
|
||||
def _get_result_input_var_content_type(api_var: ApiVar):
|
||||
description = f"""Тип содержимого файла или файлов входной переменной.
|
||||
Если при запуске пайплайна входное значение было
|
||||
передано с datatype != 'FILE', то тип содержимого файла,
|
||||
в который сохранено переданное значение, соответствует
|
||||
установленному предопределенному типу для этого datatype."""
|
||||
spec = {
|
||||
'type': 'string',
|
||||
'description': description
|
||||
}
|
||||
if api_var.content_types:
|
||||
spec['enum'] = api_var.content_types
|
||||
return spec
|
||||
|
||||
|
||||
def _get_result_output_var_content_type(api_var: ApiVar):
|
||||
description = f"""Тип содержимого файла или файлов выходной переменной.
|
||||
Тип содержимого определяется в атрибутах content_type
|
||||
объектов списка output_vars, переданных при запуске пайплайна."""
|
||||
spec = {
|
||||
'type': 'string',
|
||||
'description': description
|
||||
}
|
||||
if api_var.content_types:
|
||||
spec['enum'] = api_var.content_types
|
||||
return spec
|
||||
|
||||
|
||||
def _get_start_output_var_content_type(api_var: ApiVar):
|
||||
spec = {
|
||||
'type': 'string',
|
||||
'description': 'Ожидаемый тип содержимого файла или файлов '
|
||||
'выходной переменной.'
|
||||
}
|
||||
if api_var.content_types:
|
||||
spec['enum'] = api_var.content_types
|
||||
return spec
|
||||
|
||||
|
||||
def _construct_start_input_var(api_var: ApiVar):
|
||||
openapi_spec = _get_var_base(api_var)
|
||||
openapi_spec['properties']['datatype'] = _get_input_var_datatype(api_var)
|
||||
openapi_spec['required'].append('datatype')
|
||||
openapi_spec['properties']['data'] = _get_start_input_var_data(api_var)
|
||||
openapi_spec['required'].append('data')
|
||||
shape = _get_start_var_shape(api_var)
|
||||
openapi_spec['properties']['shape'] = shape
|
||||
openapi_spec['required'].append('shape')
|
||||
content_type = _get_start_input_var_content_type(api_var)
|
||||
openapi_spec['properties']['content_type'] = content_type
|
||||
return openapi_spec
|
||||
|
||||
|
||||
def _construct_start_output_var(api_var: ApiVar):
|
||||
openapi_spec = _get_var_base(api_var)
|
||||
openapi_spec['properties']['datatype'] = _get_output_var_datatype(api_var)
|
||||
openapi_spec['properties']['data'] = _get_start_output_var_data(api_var)
|
||||
openapi_spec['properties']['content_type'] = _get_start_output_var_content_type(api_var)
|
||||
return openapi_spec
|
||||
|
||||
|
||||
def _construct_result_input_var(api_var: ApiVar):
|
||||
openapi_spec = _get_var_base(api_var)
|
||||
openapi_spec['properties']['datatype'] = _get_input_var_datatype(api_var)
|
||||
openapi_spec['required'].append('datatype')
|
||||
openapi_spec['properties']['data'] = _get_result_input_var_data(api_var)
|
||||
openapi_spec['required'].append('data')
|
||||
openapi_spec['properties']['data_box'] = _get_result_var_data_box(api_var)
|
||||
openapi_spec['required'].append('data_box')
|
||||
openapi_spec['properties']['content_type'] = _get_result_input_var_content_type(api_var)
|
||||
openapi_spec['required'].append('content_type')
|
||||
return openapi_spec
|
||||
|
||||
|
||||
def _construct_result_output_var(api_var: ApiVar):
|
||||
openapi_spec = _get_var_base(api_var)
|
||||
openapi_spec['properties']['datatype'] = _get_output_var_datatype(api_var)
|
||||
openapi_spec['properties']['data'] = _get_result_output_var_data(api_var)
|
||||
openapi_spec['required'].append('data')
|
||||
openapi_spec['properties']['data_box'] = _get_result_var_data_box(api_var)
|
||||
openapi_spec['required'].append('data_box')
|
||||
openapi_spec['properties']['content_type'] = _get_result_output_var_content_type(api_var)
|
||||
return openapi_spec
|
||||
|
||||
|
||||
def _create_api_vars_from_k8s_resource(resource: dict, continue_with: Iterable[dict] = None):
|
||||
api_vars = []
|
||||
input_vars_names = set()
|
||||
output_vars_names = set()
|
||||
|
||||
_append_api_vars_from_k8s_resource(api_vars, input_vars_names, output_vars_names, resource)
|
||||
|
||||
if continue_with:
|
||||
for res in continue_with:
|
||||
_append_api_vars_from_k8s_resource(api_vars, input_vars_names, output_vars_names, res)
|
||||
|
||||
return api_vars
|
||||
|
||||
|
||||
def _append_api_vars_from_k8s_resource(api_vars, input_vars_names, output_vars_names, resource: dict):
|
||||
supported_input_datatypes = get_args(DATATYPES)
|
||||
supported_output_datatypes = get_args(OUTPUT_DATATYPES)
|
||||
if 'inputs' in resource:
|
||||
for input_section in resource['inputs']:
|
||||
var_name = input_section['name']
|
||||
if var_name in input_vars_names:
|
||||
continue
|
||||
if _check_box_path(input_section):
|
||||
continue
|
||||
api_var = _create_input_var(input_section, supported_input_datatypes)
|
||||
api_vars.append(api_var)
|
||||
input_vars_names.add(var_name)
|
||||
if 'outputs' in resource:
|
||||
for output_section in resource['outputs']:
|
||||
var_name = output_section['name']
|
||||
if var_name in output_vars_names:
|
||||
continue
|
||||
api_var = _create_output_var(output_section, supported_output_datatypes)
|
||||
api_vars.append(api_var)
|
||||
output_vars_names.add(var_name)
|
||||
|
||||
|
||||
def _construct_result_inputs_outputs(spec_vars: list[ApiVar]):
|
||||
input_vars = [spec_var for spec_var in spec_vars
|
||||
if spec_var.kind == 'input']
|
||||
result_inputs_spec = None
|
||||
if input_vars:
|
||||
result_inputs_spec = {
|
||||
'description': """Значения входных переменных. В списке содержатся только те
|
||||
входные переменные, значения для которых передаются и были переданы
|
||||
при запуске пайплайна.
|
||||
Входные переменные, ассоциированные в спецификации пайплайна с
|
||||
данными в ящиках, не содержатся в списке.""",
|
||||
'type': 'array',
|
||||
'items': {'oneOf': []}
|
||||
}
|
||||
required_input_vars = [spec_var for spec_var in input_vars
|
||||
if spec_var.required]
|
||||
required_input_vars_names = [spec_var.name for spec_var in required_input_vars]
|
||||
required_input_vars_str = f'\n\nОбязательные переменные: {", ".join(required_input_vars_names)}'
|
||||
result_inputs_spec['description'] += required_input_vars_str
|
||||
optional_input_vars = [spec_var for spec_var in input_vars
|
||||
if not spec_var.required]
|
||||
inputs = [_construct_result_input_var(v) for v in required_input_vars]
|
||||
inputs_min_items = len(inputs)
|
||||
inputs += [_construct_result_input_var(v) for v in optional_input_vars]
|
||||
result_inputs_spec['items']['oneOf'] = inputs
|
||||
result_inputs_spec['minItems'] = inputs_min_items
|
||||
|
||||
output_vars = [spec_var for spec_var in spec_vars
|
||||
if spec_var.kind == 'output']
|
||||
result_outputs_spec = None
|
||||
if output_vars:
|
||||
result_outputs_spec = {
|
||||
'description': 'Значения выходных переменных. '
|
||||
'В списке содержатся только те выходные переменные, '
|
||||
'параметры которых были переданы при запуске пайплайна. ',
|
||||
'type': 'array',
|
||||
'items': {'oneOf': []}
|
||||
}
|
||||
outputs = [_construct_result_output_var(v) for v in output_vars]
|
||||
result_outputs_spec['items']['oneOf'] = outputs
|
||||
|
||||
return result_inputs_spec, result_outputs_spec
|
||||
|
||||
|
||||
def _construct_start_inputs_outputs(spec_vars: list[ApiVar]):
|
||||
required_input_vars = [spec_var for spec_var in spec_vars
|
||||
if spec_var.required and spec_var.kind == 'input']
|
||||
optional_input_vars = [spec_var for spec_var in spec_vars
|
||||
if not spec_var.required and spec_var.kind == 'input']
|
||||
output_vars = [spec_var for spec_var in spec_vars
|
||||
if spec_var.kind == 'output']
|
||||
start_inputs = [_construct_start_input_var(v) for v in required_input_vars]
|
||||
start_inputs_min_items = len(start_inputs)
|
||||
start_inputs += [_construct_start_input_var(v) for v in optional_input_vars]
|
||||
start_outputs = [_construct_start_output_var(v) for v in output_vars]
|
||||
start_outputs_max_items = len(start_outputs)
|
||||
|
||||
start_outputs_spec = {
|
||||
'description': 'Список параметров для выходных переменных.\n\n'
|
||||
'Если параметры выходной переменной не заданы, то они не будет переданы в пайплайн, '
|
||||
'пайплайн не вычислит значений выходной переменной, '
|
||||
'и переменная не будет сохранена в запуске пайплайна.\n\n'
|
||||
'Если спецификация пайплайна включает следующий пайплайн, '
|
||||
'то в список включаются все выходные переменные всех '
|
||||
'последующих пайплайнов.',
|
||||
'type': 'array',
|
||||
'items': {'oneOf': start_outputs},
|
||||
'maxItems': start_outputs_max_items
|
||||
}
|
||||
|
||||
start_inputs_spec = {
|
||||
'description': 'Список значений входных переменных.\n\n'
|
||||
'Если значение входной переменной не задано, то оно не будет передано в пайплайн, '
|
||||
'и переменная не будет сохранена в запуске пайплайна. '
|
||||
'Если не задано значение обязательной переменной, то пайплайн не будет запущен.\n\n'
|
||||
'Если спецификация пайплайна включает следующий пайплайн, '
|
||||
'то в список включаются все входные переменные всех '
|
||||
'последующих пайплайнов.',
|
||||
'type': 'array',
|
||||
'items': {'oneOf': start_inputs},
|
||||
'minItems': start_inputs_min_items
|
||||
}
|
||||
|
||||
required_input_vars_names = [spec_var.name for spec_var in required_input_vars]
|
||||
required_input_vars_str = f'\n\nОбязательные переменные: {", ".join(required_input_vars_names)}'
|
||||
start_inputs_spec['description'] += required_input_vars_str
|
||||
|
||||
return start_inputs_spec, start_outputs_spec
|
||||
|
||||
|
||||
def _fill_api_spec_sections(base_spec: dict,
|
||||
api_vars: list[ApiVar]):
|
||||
result_inputs, result_outputs = _construct_result_inputs_outputs(api_vars)
|
||||
|
||||
schemas = base_spec['components']['schemas']
|
||||
trial_response = schemas['TrialResponse']
|
||||
trial_response_properties = trial_response['properties']
|
||||
if not result_inputs and not result_outputs:
|
||||
del trial_response_properties['vars']
|
||||
if result_inputs:
|
||||
trial_response_properties['vars']['properties']['inputs'] = result_inputs
|
||||
else:
|
||||
del trial_response_properties['vars']['properties']['inputs']
|
||||
if result_outputs:
|
||||
trial_response_properties['vars']['properties']['outputs'] = result_outputs
|
||||
else:
|
||||
del trial_response_properties['vars']['properties']['outputs']
|
||||
|
||||
start_inputs, start_outputs = _construct_start_inputs_outputs(api_vars)
|
||||
start_trial = schemas['CreateTrialRequest']
|
||||
start_trial_properties = start_trial['properties']
|
||||
if start_inputs:
|
||||
start_trial_properties['inputs'] = start_inputs
|
||||
else:
|
||||
del start_trial_properties['inputs']
|
||||
if start_outputs:
|
||||
start_trial_properties['output_vars'] = start_outputs
|
||||
else:
|
||||
del start_trial_properties['output_vars']
|
||||
|
||||
return base_spec
|
||||
|
||||
|
||||
def _get_base_spec():
|
||||
global _base_spec
|
||||
if not _base_spec:
|
||||
try:
|
||||
base_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
json_path = os.path.join(base_dir, 'openapi-base.json')
|
||||
with open(json_path) as f:
|
||||
_base_spec = json.load(f)
|
||||
except Exception as exc:
|
||||
logger.exception('Cannot load base OpenAPI spec', exc_info=exc)
|
||||
raise exc
|
||||
return _base_spec
|
||||
|
||||
|
||||
def create_openapi_spec(api_resource: dict, continue_with=None) -> dict:
|
||||
base_spec = _get_base_spec()
|
||||
|
||||
spec = api_resource['spec']
|
||||
api_spec = spec.get('apiSpec')
|
||||
|
||||
cw_specs = filter(lambda x: x is not None, [cw['spec'].get('apiSpec') for cw in continue_with]) \
|
||||
if continue_with \
|
||||
else None
|
||||
api_vars = []
|
||||
if api_spec:
|
||||
api_vars = _create_api_vars_from_k8s_resource(api_spec,
|
||||
cw_specs)
|
||||
|
||||
api_spec = _fill_api_spec_sections(deepcopy(base_spec), api_vars)
|
||||
|
||||
return api_spec
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
def _load_example_pipeline_spec(path):
|
||||
import yaml
|
||||
with open(path) as f:
|
||||
res = yaml.safe_load(f)
|
||||
return res
|
||||
|
||||
|
||||
import sys
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
raise SyntaxError('input spec path and output path must be provided')
|
||||
pipeline_spec = _load_example_pipeline_spec(sys.argv[1])
|
||||
|
||||
spec = create_openapi_spec(pipeline_spec)
|
||||
|
||||
with open(sys.argv[2], 'w', encoding='utf-8') as f:
|
||||
json.dump(spec, f, ensure_ascii=False, indent=2)
|
||||
1247
controller/src/exp_pipeline/pipeline.py
Normal file
1247
controller/src/exp_pipeline/pipeline.py
Normal file
File diff suppressed because it is too large
Load diff
0
controller/src/exp_pipeline/results/__init__.py
Normal file
0
controller/src/exp_pipeline/results/__init__.py
Normal file
17
controller/src/exp_pipeline/results/main.py
Normal file
17
controller/src/exp_pipeline/results/main.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from exp_pipeline.logs import configure_logging
|
||||
from exp_pipeline.results.validate_trial_results import validate_trial_results, \
|
||||
start_next_pipeline, add_status_condition, get_internal_api_credentials
|
||||
|
||||
if __name__ == '__main__':
|
||||
configure_logging()
|
||||
user_id, user_password = get_internal_api_credentials()
|
||||
status_condition, validated, next_pipeline_name = validate_trial_results()
|
||||
add_status_condition(status_condition, user_id, user_password)
|
||||
if validated and next_pipeline_name:
|
||||
start_next_pipeline(next_pipeline_name, user_id, user_password)
|
||||
170
controller/src/exp_pipeline/results/validate_trial_results.py
Normal file
170
controller/src/exp_pipeline/results/validate_trial_results.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import urllib.parse
|
||||
|
||||
import requests
|
||||
|
||||
from exp_pipeline.client_config import UNIP_PIPELINE_API, UNIP_TRACKING_ID, \
|
||||
UNIP_PIPELINE_APP_NAME, UNIP_PIPELINE_PIPELINE_NAME
|
||||
from exp_pipeline.schema import CreateStatusConditionRequest
|
||||
|
||||
|
||||
START_REQUEST_DATA_FILE_NAME = 'request.json'
|
||||
VALIDATION_VARS_DATA_FILE_NAME = 'validation-vars.json'
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_output_var_validation_mount_path(stage_name: str, output_var_name: str):
|
||||
return f'/stages/{stage_name}/outputs/{output_var_name}'
|
||||
|
||||
|
||||
def get_start_request_data_mount_path():
|
||||
return f'/start_request'
|
||||
|
||||
|
||||
def get_validation_vars_data_mount_path():
|
||||
return f'/validation_vars'
|
||||
|
||||
|
||||
def get_internal_api_secret_mount_path():
|
||||
return f'/internal_api_secret'
|
||||
|
||||
|
||||
def _validate_trial_outputs(trial_outputs):
|
||||
outputs_are_validated = True
|
||||
index = 0
|
||||
while index < len(trial_outputs) and outputs_are_validated:
|
||||
stage = trial_outputs[index]
|
||||
index += 1
|
||||
stage_name = stage["stage_name"]
|
||||
logger.info(f'stage {stage_name} outputs:')
|
||||
logger.info(stage["output_vars"])
|
||||
for output_var in stage["output_vars"]:
|
||||
mount_path = get_output_var_validation_mount_path(stage_name, output_var)
|
||||
results = os.listdir(mount_path)
|
||||
logger.info(f'stage {stage_name} output name {output_var} files:')
|
||||
logger.info(results)
|
||||
if not results:
|
||||
outputs_are_validated = False
|
||||
break
|
||||
return outputs_are_validated
|
||||
|
||||
|
||||
def _get_status_condition_type(results_are_validated):
|
||||
type_ = "OutputsAreValidated" if results_are_validated else "OutputsErrors"
|
||||
return type_
|
||||
|
||||
|
||||
def start_next_pipeline(next_pipeline_name, user_id, user_password):
|
||||
app_name = UNIP_PIPELINE_APP_NAME
|
||||
pipeline_name = UNIP_PIPELINE_PIPELINE_NAME
|
||||
next_trial_uri = f'/{app_name}/pipelines/{pipeline_name}/trials/continue'
|
||||
post_url = urllib.parse.urljoin(UNIP_PIPELINE_API, next_trial_uri)
|
||||
|
||||
logger.info(f'Starting next pipeline {next_pipeline_name}...')
|
||||
|
||||
rd_dir = get_start_request_data_mount_path()
|
||||
logger.info('Listing request data dir:')
|
||||
logger.info('\n'.join(os.listdir(rd_dir)))
|
||||
request_data_path = os.path.join(rd_dir, START_REQUEST_DATA_FILE_NAME)
|
||||
with open(request_data_path) as f:
|
||||
request_data = f.read().strip()
|
||||
|
||||
params = {'tracking_id': UNIP_TRACKING_ID, 'next_pipeline_name': next_pipeline_name}
|
||||
|
||||
logger.info(f'Requesting URL {post_url}')
|
||||
response = requests.post(post_url, params=params, data=request_data, auth=(user_id, user_password))
|
||||
|
||||
if response.status_code != 201 and response.status_code != 200:
|
||||
logger.error(f'Pipelines API error (start new trial); POST {post_url}; '
|
||||
f'Returned code: {response.status_code}')
|
||||
logger.error(response.status_code)
|
||||
logger.error(response.content)
|
||||
return
|
||||
|
||||
logger.info('Response:')
|
||||
logger.info(response.status_code)
|
||||
logger.info(response.content)
|
||||
|
||||
|
||||
def _read_validation_vars():
|
||||
vv_dir = get_validation_vars_data_mount_path()
|
||||
logger.info('Listing validation vars dir:')
|
||||
logger.info('\n'.join(os.listdir(vv_dir)))
|
||||
vv_data_path = os.path.join(vv_dir, VALIDATION_VARS_DATA_FILE_NAME)
|
||||
logger.info(f'Reading {vv_data_path}...')
|
||||
with open(vv_data_path) as f:
|
||||
vv = json.load(f)
|
||||
next_pipeline_name = vv.get('next_pipeline')
|
||||
logger.info(f'Validation vars file content:\n{vv}')
|
||||
return vv['trial_outputs'], next_pipeline_name
|
||||
|
||||
|
||||
def validate_trial_results() -> tuple[str, bool, str]:
|
||||
trial_outputs, next_pipeline_name = _read_validation_vars()
|
||||
|
||||
outputs_are_validated = _validate_trial_outputs(trial_outputs)
|
||||
|
||||
sct = _get_status_condition_type(outputs_are_validated)
|
||||
|
||||
return sct, outputs_are_validated, next_pipeline_name
|
||||
|
||||
|
||||
def _get_basic_auth_credentials(credentials_lines: str) -> tuple[str, str]:
|
||||
records = credentials_lines.split('\n')
|
||||
first_record = records[0]
|
||||
sep_index = first_record.find(':')
|
||||
user_id, user_password = first_record[:sep_index], first_record[sep_index + 1:]
|
||||
return user_id, user_password
|
||||
|
||||
|
||||
def _read_internal_api_credentials(path):
|
||||
try:
|
||||
with open(path) as f:
|
||||
creds_lines = f.read()
|
||||
user_id, user_password = _get_basic_auth_credentials(creds_lines)
|
||||
return user_id, user_password
|
||||
except IOError as exc:
|
||||
logger.error(f'Error reading internal API credentials file {path}', exc_info=exc)
|
||||
return None
|
||||
except Exception as exc:
|
||||
logger.error(f'Error parsing internal API credentials file {path}', exc_info=exc)
|
||||
return None
|
||||
|
||||
|
||||
def get_internal_api_credentials():
|
||||
cred_dir = get_internal_api_secret_mount_path()
|
||||
cred_path = os.path.join(cred_dir, 'credentials')
|
||||
if not os.path.exists(cred_path):
|
||||
logger.error(f'Internal API credentials path {cred_path} doesnt exist')
|
||||
return None
|
||||
if not os.path.isfile(cred_path):
|
||||
logger.error(f'Internal API credentials path {cred_path} is not file')
|
||||
return None
|
||||
user_id, user_password = _read_internal_api_credentials(cred_path)
|
||||
return user_id, user_password
|
||||
|
||||
|
||||
def add_status_condition(type_: str, user_id, user_password):
|
||||
app_name = UNIP_PIPELINE_APP_NAME
|
||||
pipeline_name = UNIP_PIPELINE_PIPELINE_NAME
|
||||
tracking_id = UNIP_TRACKING_ID
|
||||
conditions_uri = f'/{app_name}/pipelines/{pipeline_name}/trials/{tracking_id}/status/conditions'
|
||||
post_url = urllib.parse.urljoin(UNIP_PIPELINE_API, conditions_uri)
|
||||
request = CreateStatusConditionRequest(type=type_)
|
||||
payload = request.model_dump_json()
|
||||
logger.info('Adding status condition to trial:')
|
||||
logger.info(conditions_uri)
|
||||
logger.info(payload)
|
||||
resp = requests.post(post_url, data=payload, auth=(user_id, user_password))
|
||||
logger.info('Response:')
|
||||
logger.info(resp.status_code)
|
||||
logger.info(resp.content)
|
||||
298
controller/src/exp_pipeline/schema.py
Normal file
298
controller/src/exp_pipeline/schema.py
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, AwareDatetime
|
||||
|
||||
DATATYPES = Literal["FILE", "FP32", "FP64", "INT32", "dict", "str", "WEBSITE"]
|
||||
OUTPUT_DATATYPES = Literal["FILE", "WEBSITE"]
|
||||
DATATYPE_TO_CONTENT_TYPE = {'FP32': 'application/json',
|
||||
'FP64': 'application/json',
|
||||
'INT32': 'application/json',
|
||||
'dict': 'application/json',
|
||||
'str': 'application/json',
|
||||
'WEBSITE': 'text/html'}
|
||||
|
||||
|
||||
class Link(BaseModel):
|
||||
href: str
|
||||
templated: bool | None = None
|
||||
|
||||
|
||||
class ConnectedBox(BaseModel):
|
||||
name: str
|
||||
mode: str
|
||||
dataset_ref_box_name: str | None = None
|
||||
s3_box_name: str | None = None
|
||||
pvc_name: str
|
||||
# это путь куда монтировать ящик в контейнер, для ящика по умолчанию
|
||||
# (если не указано явно для переменной)
|
||||
mount_path: str = None
|
||||
default: bool = False
|
||||
|
||||
|
||||
class TrialInput(BaseModel):
|
||||
name: str
|
||||
data: object
|
||||
datatype: DATATYPES
|
||||
content_type: str | None = None
|
||||
shape: list[int] | int | None = None
|
||||
|
||||
|
||||
class TrialOutputVar(BaseModel):
|
||||
name: str
|
||||
data: str | None = None
|
||||
datatype: OUTPUT_DATATYPES | None = None
|
||||
content_type: str | None = None
|
||||
|
||||
|
||||
class EntryPoint(BaseModel):
|
||||
cmd: list[str]
|
||||
|
||||
|
||||
class BoxMount(BaseModel):
|
||||
box_name: str = None
|
||||
mount_path: str = None
|
||||
in_box_path: str = None
|
||||
read_only: bool = None
|
||||
|
||||
|
||||
class StartStageVar(BaseModel):
|
||||
name: str
|
||||
box_mount: BoxMount
|
||||
content_type: str | None = None
|
||||
shape: str | None = None
|
||||
|
||||
|
||||
class StartStageImage(BaseModel):
|
||||
name: str
|
||||
registry_name: str
|
||||
credentials_name: str
|
||||
|
||||
|
||||
class StartEnvVar(BaseModel):
|
||||
name: str
|
||||
value: str
|
||||
|
||||
|
||||
class StartEnvFrom(BaseModel):
|
||||
key: str
|
||||
body: str
|
||||
|
||||
|
||||
class StartConfigFromItem(BaseModel):
|
||||
key: str
|
||||
mount_sub_path: str
|
||||
|
||||
|
||||
class StartConfigFrom(BaseModel):
|
||||
mount_path: str
|
||||
source_kind: str
|
||||
source_name: str
|
||||
items: list[StartConfigFromItem] | None = None
|
||||
|
||||
|
||||
class StartVarBoxPath(BaseModel):
|
||||
in_box_path: str
|
||||
mount_path: str | None = None
|
||||
box: ConnectedBox
|
||||
|
||||
|
||||
class TrialVar(BaseModel):
|
||||
name: str
|
||||
box_name: str
|
||||
data: str
|
||||
datatype: str | None = None # datatype is required, but remains optional for backward compatibility
|
||||
content_type: str | None = None
|
||||
|
||||
|
||||
class TrialStartVar(BaseModel):
|
||||
name: str
|
||||
box_path: StartVarBoxPath
|
||||
files_location: str | None = None
|
||||
datatype: str
|
||||
content_type: str | None = None
|
||||
shape: int | list[int] | None = None
|
||||
|
||||
|
||||
class StartStageCompResources(BaseModel):
|
||||
cpu: str
|
||||
memory: str
|
||||
gpu: str | None = None
|
||||
|
||||
|
||||
class TrialStartStage(BaseModel):
|
||||
name: str
|
||||
image: StartStageImage | None = None
|
||||
inputs: list[StartStageVar]
|
||||
outputs: list[StartStageVar]
|
||||
entry_point: EntryPoint | None = None
|
||||
env_vars: list[StartEnvVar] | None = None
|
||||
env_from: list[StartEnvFrom] | None = None
|
||||
config_from: list[StartConfigFrom] | None = None
|
||||
comp_resources: StartStageCompResources
|
||||
|
||||
|
||||
class TrialVars(BaseModel):
|
||||
inputs: list[TrialVar]
|
||||
outputs: list[TrialVar]
|
||||
|
||||
|
||||
class TrialStartVars(BaseModel):
|
||||
inputs: list[TrialStartVar]
|
||||
outputs: list[TrialStartVar]
|
||||
internals: list[TrialStartVar]
|
||||
|
||||
|
||||
class StatusCondition(BaseModel):
|
||||
type: str
|
||||
condition_status: Literal["True", "False", "Unknown"]
|
||||
last_transition_time: str
|
||||
message: str = ""
|
||||
reason: str = ""
|
||||
stage: str | None = None
|
||||
|
||||
|
||||
class TrialStatus(BaseModel):
|
||||
conditions: list[StatusCondition]
|
||||
status_date_time: str | None = None
|
||||
|
||||
|
||||
class TrialStageOutput(BaseModel):
|
||||
stage_name: str
|
||||
output_vars: list[str]
|
||||
|
||||
|
||||
class ValidationVars(BaseModel):
|
||||
trial_outputs: list[TrialStageOutput]
|
||||
next_pipeline: str | None = None
|
||||
|
||||
|
||||
class ValidationInput(BaseModel):
|
||||
start_request: BoxMount
|
||||
validation_vars: BoxMount
|
||||
internal_api_secret: str
|
||||
|
||||
|
||||
class TrialStart(BaseModel):
|
||||
app_name: str
|
||||
user_id: str
|
||||
tracking_id: str
|
||||
pipeline_name: str
|
||||
pipeline_version: str
|
||||
next_pipeline_name: str | None
|
||||
stages: list[TrialStartStage]
|
||||
vars: TrialStartVars
|
||||
connected_boxes: list[ConnectedBox]
|
||||
image_registries_creds: list[str]
|
||||
status: TrialStatus
|
||||
validation_input: ValidationInput
|
||||
|
||||
|
||||
class TrialLinks(BaseModel):
|
||||
self: Link
|
||||
pipeline: Link
|
||||
next_trial: Link | None
|
||||
|
||||
|
||||
class PipelineTrial(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
app_name: str
|
||||
user_id: str
|
||||
tracking_id: str
|
||||
pipeline_name: str
|
||||
pipeline_version: str
|
||||
next_pipeline_name: str | None
|
||||
next_tracking_id: str | None
|
||||
vars: TrialVars | None
|
||||
status: TrialStatus
|
||||
|
||||
links: TrialLinks | None = Field(alias='_links', default=None)
|
||||
|
||||
|
||||
class CreateTrialRequest(BaseModel):
|
||||
inputs: list[TrialInput] | None = None
|
||||
output_vars: list[TrialOutputVar] | None = None
|
||||
|
||||
|
||||
class CreateStatusConditionRequest(BaseModel):
|
||||
type: str
|
||||
message: str = ""
|
||||
reason: str = ""
|
||||
transition_time: AwareDatetime | None = None
|
||||
stage: str | None = None
|
||||
|
||||
|
||||
class PipelineVersionResponse(BaseModel):
|
||||
openapi_spec: dict | None
|
||||
license: str | None
|
||||
pipeline_version: str = '1'
|
||||
|
||||
|
||||
class PipelineLinks(BaseModel):
|
||||
self: Link
|
||||
trials: Link
|
||||
version: Link
|
||||
|
||||
|
||||
class PipelineResponse(BaseModel):
|
||||
name: str
|
||||
links: PipelineLinks = Field(alias='_links')
|
||||
definition: dict
|
||||
|
||||
|
||||
class PipelinesLinks(BaseModel):
|
||||
self: Link
|
||||
next: Link | None
|
||||
|
||||
|
||||
class EmbeddedPipeline(BaseModel):
|
||||
name: str
|
||||
links: PipelineLinks = Field(alias='_links')
|
||||
|
||||
|
||||
class PipelinesEmbedded(BaseModel):
|
||||
pipelines: list[EmbeddedPipeline]
|
||||
|
||||
|
||||
class PipelinesResponse(BaseModel):
|
||||
links: PipelinesLinks = Field(alias='_links')
|
||||
remaining_item_count: int
|
||||
embedded: PipelinesEmbedded = Field(alias='_embedded')
|
||||
|
||||
|
||||
class TrialsLinks(BaseModel):
|
||||
self: Link
|
||||
next: Link | None
|
||||
|
||||
|
||||
class EmbeddedTrial(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
app_name: str
|
||||
user_id: str
|
||||
tracking_id: str
|
||||
pipeline_name: str
|
||||
pipeline_version: str
|
||||
next_pipeline_name: str | None
|
||||
next_tracking_id: str | None
|
||||
status: TrialStatus
|
||||
|
||||
links: TrialLinks | None = Field(alias='_links', default=None, validation_alias='links')
|
||||
|
||||
|
||||
class TrialsEmbedded(BaseModel):
|
||||
trials: list[EmbeddedTrial]
|
||||
|
||||
|
||||
class TrialsResponse(BaseModel):
|
||||
links: TrialsLinks = Field(alias='_links')
|
||||
remaining_item_count: int
|
||||
total_item_count: int
|
||||
embedded: TrialsEmbedded = Field(alias='_embedded')
|
||||
|
||||
281
controller/src/exp_pipeline/stage.py
Normal file
281
controller/src/exp_pipeline/stage.py
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
from exp_pipeline.schema import BoxMount, StartStageVar, TrialStartStage, StartStageImage, EntryPoint, StartEnvVar, \
|
||||
StartStageCompResources, StartEnvFrom, StartConfigFrom, StartConfigFromItem
|
||||
from exp_pipeline.schema import TrialStartVar
|
||||
from parse import extract_domain
|
||||
|
||||
_DEFAULT_MOUNT_PATH = '/unip'
|
||||
|
||||
|
||||
SECRET_SUFFIX = '-cred'
|
||||
|
||||
_STAGE_RESOURCE_CPU_DEFAULT = '500m'
|
||||
_STAGE_RESOURCE_MEMORY_DEFAULT = '256M'
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _construct_stage_image(image_section):
|
||||
image_name = image_section['existingImageName']
|
||||
image_registry_name = extract_domain(image_name)
|
||||
stage_image = StartStageImage(name=image_name,
|
||||
registry_name=image_registry_name,
|
||||
credentials_name=image_registry_name + SECRET_SUFFIX)
|
||||
return stage_image
|
||||
|
||||
|
||||
def _construct_entry_point(entry_point_section):
|
||||
return EntryPoint(cmd=entry_point_section['cmd'])
|
||||
|
||||
|
||||
def _construct_var_box_mount(stage_var_section: dict,
|
||||
start_var: TrialStartVar,
|
||||
read_only: bool) -> BoxMount:
|
||||
var_name = start_var.name
|
||||
start_var_box = start_var.box_path.box
|
||||
|
||||
if 'path' in stage_var_section:
|
||||
mount_path = stage_var_section['path']
|
||||
elif start_var.box_path.mount_path is not None:
|
||||
mount_path = start_var.box_path.mount_path
|
||||
elif start_var_box.mount_path is not None:
|
||||
mount_path = os.path.join(start_var_box.mount_path, var_name)
|
||||
else:
|
||||
mount_path = _DEFAULT_MOUNT_PATH + '/' + var_name
|
||||
|
||||
vbm = BoxMount()
|
||||
vbm.box_name = start_var_box.name
|
||||
vbm.mount_path = mount_path
|
||||
vbm.in_box_path = start_var.box_path.in_box_path
|
||||
vbm.read_only = read_only
|
||||
|
||||
return vbm
|
||||
|
||||
|
||||
def _construct_input_var_box_mount(stage_var_section: dict,
|
||||
start_var: TrialStartVar) -> BoxMount:
|
||||
vbm = _construct_var_box_mount(stage_var_section=stage_var_section,
|
||||
start_var=start_var,
|
||||
read_only=True)
|
||||
return vbm
|
||||
|
||||
|
||||
def _construct_output_var_box_mount(stage_var_section: dict,
|
||||
start_var: TrialStartVar) -> BoxMount:
|
||||
vbm = _construct_var_box_mount(stage_var_section=stage_var_section,
|
||||
start_var=start_var,
|
||||
read_only=False)
|
||||
return vbm
|
||||
|
||||
|
||||
def _get_stage_var_shape(start_var: TrialStartVar):
|
||||
shape = None
|
||||
if start_var.shape:
|
||||
if isinstance(start_var.shape, int):
|
||||
shape = str(start_var.shape)
|
||||
else:
|
||||
shape = json.dumps(start_var.shape)
|
||||
return shape
|
||||
|
||||
|
||||
def _construct_output_stage_var(stage_name: str,
|
||||
stage_var_section: dict,
|
||||
output_vars_by_name: dict[str, TrialStartVar],
|
||||
internal_vars_by_name: dict[str, TrialStartVar]) -> StartStageVar | None:
|
||||
var_name = stage_var_section['name']
|
||||
|
||||
if var_name in output_vars_by_name:
|
||||
start_var = output_vars_by_name[var_name]
|
||||
elif var_name in internal_vars_by_name:
|
||||
start_var = internal_vars_by_name[var_name]
|
||||
if start_var.box_path.box.dataset_ref_box_name:
|
||||
logger.info(f'Stage {stage_name} output var {var_name} cannot be dataset, skipped')
|
||||
return None
|
||||
else:
|
||||
logger.info(f'Stage {stage_name} output var {var_name} not passed')
|
||||
return None
|
||||
|
||||
var_box_mount = _construct_output_var_box_mount(stage_var_section=stage_var_section,
|
||||
start_var=start_var)
|
||||
|
||||
logger.info(f'Stage: {stage_name}; output var: {var_name}; mount path: {var_box_mount}')
|
||||
|
||||
shape = _get_stage_var_shape(start_var)
|
||||
|
||||
sv = StartStageVar(name=var_name, box_mount=var_box_mount, content_type=start_var.content_type, shape=shape)
|
||||
|
||||
return sv
|
||||
|
||||
|
||||
def _construct_input_stage_var(stage_name: str,
|
||||
stage_var_section: dict,
|
||||
input_vars_by_name: dict[str, TrialStartVar],
|
||||
internal_vars_by_name: dict[str, TrialStartVar]) -> StartStageVar | None:
|
||||
var_name = stage_var_section['name']
|
||||
|
||||
if var_name in input_vars_by_name:
|
||||
start_var = input_vars_by_name[var_name]
|
||||
elif var_name in internal_vars_by_name:
|
||||
start_var = internal_vars_by_name[var_name]
|
||||
else:
|
||||
logger.info(f'Stage {stage_name} input var {var_name} not passed')
|
||||
return None
|
||||
|
||||
var_box_mount = _construct_input_var_box_mount(stage_var_section=stage_var_section,
|
||||
start_var=start_var)
|
||||
|
||||
logger.info(f'Stage: {stage_name}; input var: {var_name}; mount path: {var_box_mount}')
|
||||
|
||||
shape = _get_stage_var_shape(start_var)
|
||||
|
||||
sv = StartStageVar(name=var_name, box_mount=var_box_mount, content_type=start_var.content_type, shape=shape)
|
||||
|
||||
return sv
|
||||
|
||||
|
||||
def _check_mounts_are_not_prefix_each_other(mount_paths: list[str]):
|
||||
smp = [Path(mp) for mp in sorted(mount_paths)]
|
||||
for index in range(len(smp) - 1):
|
||||
mp = smp[index]
|
||||
next_mp = smp[index + 1]
|
||||
if mp == next_mp or mp in next_mp.parents:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _check_mounts(input_vars: list[StartStageVar], output_vars: list[StartStageVar]):
|
||||
all_vars = input_vars + output_vars
|
||||
if not _check_mounts_are_not_prefix_each_other([v.box_mount.mount_path for v in all_vars]):
|
||||
logger.error(f'Some mount paths are prefixes of others')
|
||||
#paths = [v.box_mount.mount_path for v in all_vars]
|
||||
#logger.debug(f'Mount paths:\n{paths}')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
|
||||
def _construct_env_vars(spec) -> list[StartEnvVar]:
|
||||
env_section = spec.get('env')
|
||||
envs = []
|
||||
if env_section:
|
||||
envs = [StartEnvVar(name=env_var['name'], value=env_var['value']) for env_var in env_section]
|
||||
return envs
|
||||
|
||||
|
||||
def _construct_stage_env_from(spec) -> list[StartEnvFrom]:
|
||||
res = []
|
||||
env_from_section = spec.get('envFrom')
|
||||
if env_from_section is None:
|
||||
return res
|
||||
|
||||
# elem: {'configMapRef': {'name': 'env-configmap'}}
|
||||
for elem in env_from_section:
|
||||
# key_and_body: [('configMapRef', {'name': 'env-configmap'})]
|
||||
key_and_body = list(elem.items())
|
||||
# key: "configMapRef"
|
||||
key = key_and_body[0][0]
|
||||
# body: {'name': 'env-configmap'}
|
||||
body = key_and_body[0][1]
|
||||
# body_str: "name: env-configmap"
|
||||
body_str = str(body).replace('{', '').replace('}', '').replace('\'', '')
|
||||
item = StartEnvFrom(key=key, body=body_str)
|
||||
res.append(item)
|
||||
return res
|
||||
|
||||
|
||||
def _construct_stage_config_from(spec) -> list[StartConfigFrom]:
|
||||
res = []
|
||||
config_from_section = spec.get('configFrom')
|
||||
if config_from_section is None:
|
||||
return res
|
||||
|
||||
mount_paths = set()
|
||||
|
||||
for elem in config_from_section:
|
||||
if 'configMap' in elem:
|
||||
source_kind = 'ConfigMap'
|
||||
source_section = elem['configMap']
|
||||
elif 'secret' in elem:
|
||||
source_kind = 'Secret'
|
||||
source_section = elem['secret']
|
||||
else:
|
||||
logger.error(f'Unknown source kind of {elem}')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
source_name = source_section['name']
|
||||
mount_path = elem['path']
|
||||
if mount_path in mount_paths:
|
||||
logger.error(f'Duplicate path "{mount_path}" in configFrom')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
mount_paths.add(mount_path)
|
||||
items = None
|
||||
if 'items' in source_section:
|
||||
items = [StartConfigFromItem(key=i['key'], mount_sub_path=i['path']) for i in source_section['items']]
|
||||
config_from = StartConfigFrom(mount_path=mount_path,
|
||||
source_kind=source_kind,
|
||||
source_name=source_name,
|
||||
items=items)
|
||||
res.append(config_from)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _construct_comp_resources(spec) -> StartStageCompResources:
|
||||
if 'resourceLimits' not in spec:
|
||||
return StartStageCompResources(cpu=_STAGE_RESOURCE_CPU_DEFAULT,
|
||||
memory=_STAGE_RESOURCE_MEMORY_DEFAULT)
|
||||
resource_limits = spec['resourceLimits']
|
||||
cpu = resource_limits.get('cpu', _STAGE_RESOURCE_CPU_DEFAULT)
|
||||
memory = resource_limits.get('memory', _STAGE_RESOURCE_MEMORY_DEFAULT)
|
||||
gpu = resource_limits.get('gpu')
|
||||
return StartStageCompResources(cpu=cpu, memory=memory, gpu=gpu)
|
||||
|
||||
|
||||
def construct_start_stage(
|
||||
stage_section: dict,
|
||||
input_vars_by_name: dict[str, TrialStartVar],
|
||||
output_vars_by_name: dict[str, TrialStartVar],
|
||||
internal_vars_by_name: dict[str, TrialStartVar]
|
||||
) -> TrialStartStage:
|
||||
name = stage_section['name']
|
||||
|
||||
stage_image = _construct_stage_image(stage_section['image'])
|
||||
stage_inputs = [_construct_input_stage_var(stage_name=name,
|
||||
stage_var_section=s,
|
||||
input_vars_by_name=input_vars_by_name,
|
||||
internal_vars_by_name=internal_vars_by_name)
|
||||
for s in stage_section['inputs']]
|
||||
stage_inputs = [i for i in stage_inputs if i is not None]
|
||||
|
||||
stage_outputs = [_construct_output_stage_var(
|
||||
stage_name=name,
|
||||
stage_var_section=s,
|
||||
output_vars_by_name=output_vars_by_name,
|
||||
internal_vars_by_name=internal_vars_by_name
|
||||
)
|
||||
for s in stage_section['outputs']]
|
||||
stage_outputs = [o for o in stage_outputs if o is not None]
|
||||
|
||||
_check_mounts(stage_inputs, stage_outputs)
|
||||
|
||||
entry_point = _construct_entry_point(stage_section['entryPoint']) if 'entryPoint' in stage_section else None
|
||||
env_vars = _construct_env_vars(stage_section)
|
||||
env_from = _construct_stage_env_from(stage_section)
|
||||
config_from = _construct_stage_config_from(stage_section)
|
||||
|
||||
comp_resources = _construct_comp_resources(stage_section)
|
||||
|
||||
stage = TrialStartStage(name=name, image=stage_image,
|
||||
inputs=stage_inputs, outputs=stage_outputs, entry_point=entry_point,
|
||||
env_vars=env_vars, comp_resources=comp_resources,
|
||||
env_from=env_from, config_from=config_from)
|
||||
return stage
|
||||
0
controller/src/exp_pipeline/storage/__init__.py
Normal file
0
controller/src/exp_pipeline/storage/__init__.py
Normal file
13
controller/src/exp_pipeline/storage/config.py
Normal file
13
controller/src/exp_pipeline/storage/config.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
UNIP_DATABASE_PIPELINES_USER = os.getenv('UNIP_DATABASE_PIPELINES_USER')
|
||||
UNIP_DATABASE_PIPELINES_PASSWORD = os.getenv('UNIP_DATABASE_PIPELINES_PASSWORD')
|
||||
UNIP_DATABASE_PIPELINES_HOST = os.getenv('UNIP_DATABASE_PIPELINES_HOST')
|
||||
UNIP_DATABASE_PIPELINES_PORT = os.getenv('UNIP_DATABASE_PIPELINES_PORT', 5432)
|
||||
UNIP_DATABASE_PIPELINES_DB = os.getenv('UNIP_DATABASE_PIPELINES_DB')
|
||||
37
controller/src/exp_pipeline/storage/db.py
Normal file
37
controller/src/exp_pipeline/storage/db.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
|
||||
|
||||
from exp_pipeline.storage.config import UNIP_DATABASE_PIPELINES_USER, UNIP_DATABASE_PIPELINES_PASSWORD, \
|
||||
UNIP_DATABASE_PIPELINES_DB, UNIP_DATABASE_PIPELINES_PORT, \
|
||||
UNIP_DATABASE_PIPELINES_HOST
|
||||
from exp_pipeline.storage.models import Base
|
||||
|
||||
SQLALCHEMY_DATABASE_URL = \
|
||||
f'postgresql+asyncpg://{UNIP_DATABASE_PIPELINES_USER}:{UNIP_DATABASE_PIPELINES_PASSWORD}@' \
|
||||
f'{UNIP_DATABASE_PIPELINES_HOST}:{UNIP_DATABASE_PIPELINES_PORT}/{UNIP_DATABASE_PIPELINES_DB}'
|
||||
|
||||
async_engine = create_async_engine(SQLALCHEMY_DATABASE_URL)
|
||||
|
||||
|
||||
AsyncDbSession = async_sessionmaker(bind=async_engine, expire_on_commit=False)
|
||||
|
||||
|
||||
async def get_db():
|
||||
async with AsyncDbSession() as session:
|
||||
yield session
|
||||
|
||||
|
||||
async def recreate_db():
|
||||
async with async_engine.begin() as async_conn:
|
||||
await async_conn.run_sync(Base.metadata.drop_all)
|
||||
await async_conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
|
||||
async def init_db():
|
||||
async with async_engine.begin() as async_conn:
|
||||
await async_conn.run_sync(Base.metadata.create_all)
|
||||
28
controller/src/exp_pipeline/storage/models.py
Normal file
28
controller/src/exp_pipeline/storage/models.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from sqlalchemy import Integer, String, DateTime
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import DeclarativeBase, mapped_column
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
class PipelineTrialModel(Base):
|
||||
__tablename__ = "pipeline_trials"
|
||||
|
||||
id = mapped_column(Integer, primary_key=True)
|
||||
user_id = mapped_column(String, nullable=False)
|
||||
app_name = mapped_column(String, nullable=False)
|
||||
tracking_id = mapped_column(String, nullable=False, index=True, unique=True)
|
||||
pipeline_name = mapped_column(String, nullable=False)
|
||||
next_pipeline_name = mapped_column(String, nullable=True)
|
||||
next_tracking_id = mapped_column(String, nullable=True)
|
||||
pipeline_version = mapped_column(String, nullable=False)
|
||||
vars = mapped_column(JSONB, nullable=True)
|
||||
status = mapped_column(JSONB, nullable=False)
|
||||
508
controller/src/exp_pipeline/var.py
Normal file
508
controller/src/exp_pipeline/var.py
Normal file
|
|
@ -0,0 +1,508 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: ExperimentPipeline
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import json
|
||||
import logging
|
||||
import posixpath
|
||||
import urllib.parse
|
||||
import uuid
|
||||
from functools import reduce
|
||||
from operator import mul
|
||||
|
||||
from fastapi import HTTPException
|
||||
from httpx import AsyncClient
|
||||
|
||||
from exp_pipeline.box import get_path_inside_box, get_default_box
|
||||
from exp_pipeline.config import UNIP_FILES_API
|
||||
from exp_pipeline.schema import ConnectedBox, TrialInput, TrialOutputVar, TrialStartVar, StartVarBoxPath, \
|
||||
DATATYPE_TO_CONTENT_TYPE
|
||||
from location import parse_location
|
||||
|
||||
|
||||
UNIP_FILES_USER_PASSWORD = '**'
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _reshape(lst: list, shape: list[int]):
|
||||
if len(shape) == 1:
|
||||
return lst
|
||||
n = reduce(mul, shape[1:])
|
||||
return [_reshape(lst[i * n:(i + 1) * n], shape[1:]) for i in range(len(lst) // n)]
|
||||
|
||||
|
||||
async def _save_data_to_file(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
s3_box_name: str,
|
||||
input_data: object,
|
||||
shape: list[int],
|
||||
content_type: str) -> str:
|
||||
data = input_data
|
||||
if isinstance(data, list):
|
||||
data = _reshape(data, shape)
|
||||
|
||||
# в текущей реализации все сохраняется в текст в формате JSON
|
||||
str_data = json.dumps(data)
|
||||
content_type = 'application/json'
|
||||
|
||||
# создание файла в заданной файловой группе,
|
||||
# если файловой группы нет, то files API создаст ее
|
||||
url_prefix = f'{app_name}/files/{s3_box_name}/'
|
||||
create_file_url = urllib.parse.urljoin(UNIP_FILES_API,
|
||||
url_prefix)
|
||||
|
||||
res = await http_client.post(create_file_url, auth=(user_id, UNIP_FILES_USER_PASSWORD))
|
||||
if res.status_code != 201:
|
||||
logger.error(f'Files API request error (create file); PUT {create_file_url}; '
|
||||
f'Returned code: {res.status_code}')
|
||||
logger.error(f'Response content {res.content}')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
res_data = res.json()
|
||||
put_url = res_data['presigned_put_url']
|
||||
new_file_name: str = res_data['name']
|
||||
if new_file_name.startswith(url_prefix):
|
||||
new_file_name = new_file_name[len(url_prefix):]
|
||||
path = new_file_name
|
||||
binary_data = str_data.encode('utf-8')
|
||||
data_len = len(str_data)
|
||||
|
||||
headers = {'Content-Type': content_type, 'Content-Length': str(data_len)}
|
||||
res = await http_client.put(put_url, content=binary_data, headers=headers,
|
||||
auth=(user_id, UNIP_FILES_USER_PASSWORD))
|
||||
|
||||
if res.status_code != 201 and res.status_code != 200:
|
||||
logger.error(f'S3 request error (put file content); PUT {put_url}; '
|
||||
f'Returned code: {res.status_code}')
|
||||
logger.error(f'Response content {res.content}')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
# await asyncio.sleep(5)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def _get_var_box(user_id: str,
|
||||
pipeline_name: str,
|
||||
connected_boxes: list[ConnectedBox],
|
||||
box_section: dict | None
|
||||
) -> ConnectedBox:
|
||||
connected_boxes_by_name = {cb.name: cb for cb in connected_boxes}
|
||||
if box_section and 'name' in box_section:
|
||||
name = box_section['name']
|
||||
if name not in connected_boxes_by_name:
|
||||
logger.error(f'Connected box {name} not specified in pipeline {pipeline_name}')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
return connected_boxes_by_name[name]
|
||||
return get_default_box(user_id, pipeline_name, connected_boxes)
|
||||
|
||||
|
||||
async def _get_input_var_location(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
var_box: ConnectedBox,
|
||||
passed_var_input: TrialInput) -> tuple[str, str]:
|
||||
if passed_var_input.datatype in {"FILE", "WEBSITE"}:
|
||||
if not isinstance(passed_var_input.data, str):
|
||||
raise HTTPException(status_code=400, detail=f"Passed input var {passed_var_input.name} "
|
||||
f"datatype is {passed_var_input.datatype},"
|
||||
f" but data is not instance of 'str'")
|
||||
if passed_var_input.datatype == 'FILE' and not passed_var_input.content_type:
|
||||
raise HTTPException(status_code=400, detail=f"Passed input var {passed_var_input.name} "
|
||||
f"datatype is FILE, content_type is required")
|
||||
if passed_var_input.datatype == 'WEBSITE':
|
||||
passed_var_input.content_type = DATATYPE_TO_CONTENT_TYPE['WEBSITE']
|
||||
|
||||
file_group_name, file_name = parse_location(passed_var_input.data)
|
||||
if not file_group_name:
|
||||
raise HTTPException(status_code=400,
|
||||
detail=f"At least file group must be passed for input of var {passed_var_input.name}, "
|
||||
f"given: {passed_var_input.data}")
|
||||
if not file_name:
|
||||
# parse_location возвращает значения, которые не начинаются и не заканчиваются символом '/';
|
||||
# если передано имя файловой группы, то путь должен оканчиваться '/' -
|
||||
# монтируется папка;
|
||||
path = file_group_name + '/'
|
||||
else:
|
||||
# если передано имя файловой группы и имя файла, то объединяем их -
|
||||
# монтируется файл;
|
||||
path = posixpath.join(file_group_name, file_name)
|
||||
return path, passed_var_input.content_type
|
||||
else:
|
||||
if not passed_var_input.data:
|
||||
raise HTTPException(status_code=400, detail=f"Passed input var {passed_var_input.name} "
|
||||
f"datatype is {passed_var_input.datatype}, but data not passed")
|
||||
# создается временный файл во временной файловой группе, путь к нему возвращается -
|
||||
# монтируется файл;
|
||||
path = await _save_data_to_file(http_client=http_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
s3_box_name=var_box.s3_box_name,
|
||||
input_data=passed_var_input.data,
|
||||
shape=passed_var_input.shape,
|
||||
content_type=passed_var_input.content_type)
|
||||
return path, 'application/json'
|
||||
|
||||
|
||||
def _get_input_var_datatype(passed_var_input: TrialInput):
|
||||
if passed_var_input.datatype == 'WEBSITE':
|
||||
return 'WEBSITE'
|
||||
return 'FILE'
|
||||
|
||||
|
||||
def _get_input_var_in_box_path(user_id: str,
|
||||
input_location: str | None) -> str:
|
||||
in_box_path = get_path_inside_box(user_id, input_location)
|
||||
return in_box_path
|
||||
|
||||
|
||||
def _get_var_mount_path(var_section: dict):
|
||||
return var_section.get('path', None)
|
||||
|
||||
|
||||
async def _construct_input_var(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
pipeline_name: str,
|
||||
connected_boxes: list[ConnectedBox],
|
||||
var_section: dict,
|
||||
var_input: TrialInput) -> TrialStartVar:
|
||||
var_name = var_section['name']
|
||||
|
||||
box_section = None
|
||||
if 'mountFrom' in var_section:
|
||||
mount_from_section = var_section['mountFrom']
|
||||
if 'box' in mount_from_section:
|
||||
box_section = mount_from_section['box']
|
||||
|
||||
var_box = _get_var_box(user_id=user_id,
|
||||
pipeline_name=pipeline_name,
|
||||
connected_boxes=connected_boxes,
|
||||
box_section=box_section)
|
||||
if var_box.dataset_ref_box_name:
|
||||
logger.info(f'Input var {var_name} connected box {var_box.name} cannot be dataset')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
input_location, content_type = await _get_input_var_location(http_client=http_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
var_box=var_box,
|
||||
passed_var_input=var_input)
|
||||
datatype = _get_input_var_datatype(passed_var_input=var_input)
|
||||
logger.info(f'Input var: {var_name}; input location: {input_location}')
|
||||
|
||||
in_box_path = _get_input_var_in_box_path(user_id=user_id, input_location=input_location)
|
||||
mount_path = _get_var_mount_path(var_section=var_section)
|
||||
var_box_path = StartVarBoxPath(in_box_path=in_box_path, box=var_box, mount_path=mount_path)
|
||||
|
||||
sv = TrialStartVar(name=var_name, box_path=var_box_path, files_location=input_location,
|
||||
datatype=datatype, content_type=content_type, shape=var_input.shape)
|
||||
|
||||
return sv
|
||||
|
||||
|
||||
async def _create_file_group(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
box_name: str,
|
||||
location: str):
|
||||
url_prefix = f'/{app_name}/files/{box_name}'
|
||||
url_prefix = posixpath.join(url_prefix, location)
|
||||
create_fg_url = urllib.parse.urljoin(UNIP_FILES_API, url_prefix)
|
||||
|
||||
res = await http_client.put(create_fg_url, auth=(user_id, UNIP_FILES_USER_PASSWORD))
|
||||
if res.status_code != 200:
|
||||
logger.error(f'Files API request error (create file-group); PUT {create_fg_url}; '
|
||||
f'Returned code: {res.status_code}')
|
||||
logger.error(f'Response content {res.content}')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
|
||||
async def _get_output_var_location(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
var_box: ConnectedBox,
|
||||
passed_var: TrialOutputVar | None):
|
||||
passed = passed_var.data if passed_var else None
|
||||
file_group_name, file_name = parse_location(passed)
|
||||
|
||||
if file_name:
|
||||
# выходные переменные всегда должны быть файловыми группами
|
||||
raise HTTPException(status_code=400,
|
||||
detail=f'File name must NOT be specified for output var {passed_var.name}; '
|
||||
f'given location {passed}')
|
||||
|
||||
if passed_var.datatype == 'WEBSITE':
|
||||
passed_var.content_type = DATATYPE_TO_CONTENT_TYPE['WEBSITE']
|
||||
|
||||
if not file_group_name:
|
||||
# если передана пустая локация, то генерируется уникальное имя файловой группы,
|
||||
# Files API не поддерживает генерацию имен при создании пустых файловых групп;
|
||||
path = str(uuid.uuid4()) + '/'
|
||||
else:
|
||||
# если передана файловая группа, то нужно дописать '/',
|
||||
# посколькуо parse_location всегда возвращает значения, которые не начинаются
|
||||
# и не оканчиваются '/'
|
||||
path = file_group_name + '/'
|
||||
|
||||
await _create_file_group(http_client, app_name, user_id, var_box.s3_box_name, path)
|
||||
|
||||
return path, passed_var.content_type
|
||||
|
||||
|
||||
def _get_output_var_in_box_path(
|
||||
user_id: str,
|
||||
|
||||
output_location: str | None
|
||||
) -> str:
|
||||
in_box_path = get_path_inside_box(user_id, output_location)
|
||||
return in_box_path
|
||||
|
||||
|
||||
def _get_output_var_datatype(passed_var: TrialOutputVar):
|
||||
if not passed_var.datatype:
|
||||
return 'FILE'
|
||||
return passed_var.datatype
|
||||
|
||||
|
||||
async def _construct_output_var(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
pipeline_name: str,
|
||||
connected_boxes: list[ConnectedBox],
|
||||
var_section: dict,
|
||||
trial_output_var: TrialOutputVar) -> TrialStartVar:
|
||||
var_name = var_section['name']
|
||||
|
||||
box_section = None
|
||||
if 'mountFrom' in var_section:
|
||||
mount_from_section = var_section['mountFrom']
|
||||
if 'box' in mount_from_section:
|
||||
box_section = mount_from_section['box']
|
||||
|
||||
var_box = _get_var_box(user_id=user_id,
|
||||
pipeline_name=pipeline_name,
|
||||
connected_boxes=connected_boxes,
|
||||
box_section=box_section)
|
||||
if var_box.dataset_ref_box_name:
|
||||
logger.info(f'Output var {var_name} connected box {var_box.name} cannot be dataset')
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
output_location, content_type = await _get_output_var_location(http_client=http_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
var_box=var_box,
|
||||
passed_var=trial_output_var)
|
||||
datatype = _get_output_var_datatype(passed_var=trial_output_var)
|
||||
|
||||
logger.info(f'Output var: {var_name}; output location: {output_location}')
|
||||
|
||||
in_box_path = _get_output_var_in_box_path(user_id=user_id, output_location=output_location)
|
||||
mount_path = _get_var_mount_path(var_section=var_section)
|
||||
var_box_path = StartVarBoxPath(in_box_path=in_box_path, box=var_box, mount_path=mount_path)
|
||||
|
||||
sv = TrialStartVar(name=var_name, box_path=var_box_path, files_location=output_location,
|
||||
datatype=datatype, content_type=content_type)
|
||||
|
||||
return sv
|
||||
|
||||
|
||||
async def _get_internal_var_location(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
var_box: ConnectedBox,
|
||||
var_section: dict):
|
||||
var_name = var_section['name']
|
||||
|
||||
if var_box.dataset_ref_box_name:
|
||||
logger.info(f'For internal var {var_name} dataset is used')
|
||||
return None
|
||||
|
||||
box_section = None
|
||||
if 'mountFrom' in var_section:
|
||||
mount_from_section = var_section['mountFrom']
|
||||
if 'box' in mount_from_section:
|
||||
box_section = mount_from_section['box']
|
||||
|
||||
# если задан boxPath, то локация не создается
|
||||
if box_section and ('boxPath' in box_section):
|
||||
logger.info(f'For internal var {var_name} boxPath is used')
|
||||
return None
|
||||
|
||||
# иначе нужно создать пустую локацию; генерируется уникальное имя файловой группы,
|
||||
# Files API не поддерживает генерацию имен при создании пустых файловых групп;
|
||||
path = str(uuid.uuid4()) + '/'
|
||||
|
||||
await _create_file_group(http_client, app_name, user_id, var_box.s3_box_name, path)
|
||||
|
||||
logger.info(f'Internal var: {var_name}; internal location: {path}')
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def _get_internal_var_in_box_path(user_id: str,
|
||||
var_box: ConnectedBox,
|
||||
var_section: dict,
|
||||
internal_location: str | None):
|
||||
var_name = var_section['name']
|
||||
|
||||
box_section = None
|
||||
if 'mountFrom' in var_section:
|
||||
mount_from_section = var_section['mountFrom']
|
||||
if 'box' in mount_from_section:
|
||||
box_section = mount_from_section['box']
|
||||
|
||||
static_box_path = var_box.dataset_ref_box_name or (box_section and ('boxPath' in box_section))
|
||||
|
||||
if static_box_path and internal_location:
|
||||
logger.error(f'Bug. Cannot use internal var {var_name} location'
|
||||
f' if boxPath or mountDataset is defined in pipeline specification')
|
||||
raise HTTPException(status_code=500,
|
||||
detail=f'Internal server error')
|
||||
if not static_box_path and not internal_location:
|
||||
logger.error(f'Bug. Internal var location not passed and bot boxPath, mountDataset not specified '
|
||||
f'for internal var {var_name}.')
|
||||
raise HTTPException(status_code=500,
|
||||
detail=f'Internal server error')
|
||||
|
||||
in_box_path = None
|
||||
# либо boxPath и mountDataset не указаны в спецификации, и создается пустая файловая группа,
|
||||
# internal_location содержит ее значение;
|
||||
if internal_location:
|
||||
in_box_path = get_path_inside_box(user_id, internal_location)
|
||||
# либо boxPath или mountDataset указаны в спецификации;
|
||||
if static_box_path:
|
||||
if var_box.dataset_ref_box_name:
|
||||
in_box_path = ""
|
||||
elif box_section and ('boxPath' in box_section):
|
||||
in_box_path = box_section['boxPath']
|
||||
if in_box_path is not None and in_box_path.startswith('/'):
|
||||
in_box_path = in_box_path.lstrip('/')
|
||||
|
||||
return in_box_path
|
||||
|
||||
|
||||
def _get_internal_var_datatype():
|
||||
return 'FILE'
|
||||
|
||||
|
||||
async def _construct_internal_var(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
pipeline_name: str,
|
||||
connected_boxes: list[ConnectedBox],
|
||||
var_section: dict) -> TrialStartVar:
|
||||
var_name = var_section['name']
|
||||
|
||||
box_section = None
|
||||
if 'mountFrom' in var_section:
|
||||
mount_from_section = var_section['mountFrom']
|
||||
if 'box' in mount_from_section:
|
||||
box_section = mount_from_section['box']
|
||||
|
||||
var_box = _get_var_box(user_id=user_id,
|
||||
pipeline_name=pipeline_name,
|
||||
connected_boxes=connected_boxes,
|
||||
box_section=box_section)
|
||||
|
||||
internal_location = await _get_internal_var_location(http_client=http_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
var_box=var_box,
|
||||
var_section=var_section)
|
||||
datatype = _get_internal_var_datatype()
|
||||
|
||||
in_box_path = _get_internal_var_in_box_path(user_id=user_id,
|
||||
var_box=var_box,
|
||||
var_section=var_section,
|
||||
internal_location=internal_location)
|
||||
mount_path = _get_var_mount_path(var_section=var_section)
|
||||
var_box_path = StartVarBoxPath(in_box_path=in_box_path, box=var_box, mount_path=mount_path)
|
||||
|
||||
sv = TrialStartVar(name=var_name, box_path=var_box_path, files_location=internal_location,
|
||||
datatype=datatype)
|
||||
|
||||
return sv
|
||||
|
||||
|
||||
def _determine_start_vars_spec(pipeline_api_spec, pipeline_spec):
|
||||
result = input_vars_specs, output_vars_specs, internal_vars_specs = {}, {}, {}
|
||||
|
||||
if 'apiSpec' not in pipeline_api_spec:
|
||||
return result
|
||||
|
||||
pipeline_vars = pipeline_spec['vars']
|
||||
api_spec = pipeline_api_spec['apiSpec']
|
||||
|
||||
inputs_names = {i['name'] for i in api_spec['inputs']} if 'inputs' in api_spec else set()
|
||||
input_vars_specs.update({v['name']: v for v in pipeline_vars if v['name'] in inputs_names})
|
||||
|
||||
outputs_names = {o['name'] for o in api_spec['outputs']} if 'outputs' in api_spec else set()
|
||||
output_vars_specs.update({v['name']: v for v in pipeline_vars if v['name'] in outputs_names})
|
||||
|
||||
internal_vars_specs.update({v['name']: v for v in pipeline_vars if v['name'] not in (inputs_names | outputs_names)})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def construct_start_vars(http_client: AsyncClient,
|
||||
app_name: str,
|
||||
user_id: str,
|
||||
pipeline_name: str,
|
||||
pipeline_api_spec: dict,
|
||||
pipeline_spec: dict,
|
||||
connected_boxes: list[ConnectedBox],
|
||||
passed_trial_inputs: list[TrialInput],
|
||||
passed_trial_output_vars: list[TrialOutputVar]) \
|
||||
-> tuple[list[TrialStartVar], list[TrialStartVar], list[TrialStartVar]]:
|
||||
input_vars_specs, output_vars_specs, internal_vars_specs = _determine_start_vars_spec(pipeline_api_spec,
|
||||
pipeline_spec)
|
||||
|
||||
input_vars = []
|
||||
# входные данные прошли валидацию, поэтому считаются корректными;
|
||||
# поэтому для каждого переданного входа отыскивается спецификация в ресурсе пайплайна;
|
||||
for i in passed_trial_inputs:
|
||||
# исключение - когда за пайплайном следует другой пайплайн, и переданные данные
|
||||
# относятся к нему;
|
||||
if i.name not in input_vars_specs:
|
||||
continue
|
||||
input_var_spec = input_vars_specs[i.name]
|
||||
input_var = await _construct_input_var(http_client=http_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
pipeline_name=pipeline_name,
|
||||
connected_boxes=connected_boxes,
|
||||
var_section=input_var_spec,
|
||||
var_input=i)
|
||||
input_vars.append(input_var)
|
||||
|
||||
output_vars = []
|
||||
# аналогично
|
||||
for o in passed_trial_output_vars:
|
||||
if o.name not in output_vars_specs:
|
||||
continue
|
||||
output_var_spec = output_vars_specs[o.name]
|
||||
output_var = await _construct_output_var(http_client=http_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
pipeline_name=pipeline_name,
|
||||
connected_boxes=connected_boxes,
|
||||
var_section=output_var_spec,
|
||||
trial_output_var=o)
|
||||
output_vars.append(output_var)
|
||||
|
||||
# а в данном случае берутся все внутренние переменные,
|
||||
# которые специфицированы в пайплайне (входные значения для них не передаются)
|
||||
internal_vars = [await _construct_internal_var(http_client=http_client,
|
||||
app_name=app_name,
|
||||
user_id=user_id,
|
||||
pipeline_name=pipeline_name,
|
||||
connected_boxes=connected_boxes,
|
||||
var_section=v)
|
||||
for v in internal_vars_specs.values()]
|
||||
|
||||
return input_vars, output_vars, internal_vars
|
||||
0
controller/src/files/__init__.py
Normal file
0
controller/src/files/__init__.py
Normal file
385
controller/src/files/api.py
Normal file
385
controller/src/files/api.py
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Files
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import logging
|
||||
import posixpath
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Annotated
|
||||
|
||||
import httpx
|
||||
from fastapi import FastAPI, Response, status, Depends, HTTPException
|
||||
from kubernetes_asyncio.client import ApiClient
|
||||
from pydantic import BaseModel
|
||||
|
||||
from auth import get_user_id
|
||||
from files.box import get_box, get_box_name, DataBox, _get_s3_box_creds
|
||||
from files.logging import configure_logging
|
||||
from files.s3.client import AsyncS3Client
|
||||
from kube_config import async_unip_load_kube_config
|
||||
from location import parse_location
|
||||
|
||||
_k8s_api_client: ApiClient | None = None
|
||||
_async_http_client: httpx.AsyncClient | None = None
|
||||
|
||||
configure_logging()
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI):
|
||||
global _k8s_api_client, _async_http_client
|
||||
await async_unip_load_kube_config(logger)
|
||||
_k8s_api_client = ApiClient()
|
||||
_async_http_client = httpx.AsyncClient(timeout=30)
|
||||
yield
|
||||
await _async_http_client.aclose()
|
||||
await _k8s_api_client.close()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=_lifespan)
|
||||
|
||||
|
||||
class ReadFileResponse(BaseModel):
|
||||
name: str
|
||||
presigned_get_url: str
|
||||
|
||||
|
||||
class WriteFileResponse(BaseModel):
|
||||
name: str
|
||||
presigned_put_url: str
|
||||
|
||||
|
||||
class FileGroup(BaseModel):
|
||||
name: str
|
||||
files: list[ReadFileResponse | WriteFileResponse]
|
||||
|
||||
|
||||
async def _get_async_s3_client(box: DataBox):
|
||||
endpoint, access_key, secret_key, region = await _get_s3_box_creds(_k8s_api_client, box)
|
||||
async_s3_client = AsyncS3Client(http_client=_async_http_client,
|
||||
endpoint=endpoint,
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
region=region)
|
||||
return async_s3_client
|
||||
|
||||
|
||||
def _get_in_storage_path(box, user_id, file_group_name):
|
||||
"""
|
||||
Конструирует путь к файловой группе пользователя внутри ящика.
|
||||
|
||||
Обеспечивается, что возвращаемая строка не начинается с символа '/'.
|
||||
"""
|
||||
storage_sub_path = box.storage_sub_path.lstrip('/') if box.storage_sub_path else ''
|
||||
return posixpath.join(storage_sub_path, 'users', user_id, 'file_groups', file_group_name)
|
||||
|
||||
|
||||
def _get_path_public_name(app_name: str, box: DataBox, file_group_name: str, file_name: str | None):
|
||||
"""
|
||||
Формирует путь к файловой группе или файлу, который вернется пользователю в ответе.
|
||||
|
||||
Требует, чтобы file_group_name, file_name не оканчивались символом '/'.
|
||||
"""
|
||||
|
||||
if not file_group_name.endswith('/'):
|
||||
file_group_name = file_group_name + '/'
|
||||
if file_name:
|
||||
if file_name.endswith('/'):
|
||||
file_name = file_name.rstrip('/')
|
||||
public_name = posixpath.join(app_name, 'files', box.name, file_group_name, file_name)
|
||||
else:
|
||||
public_name = posixpath.join(app_name, 'files', box.name, file_group_name)
|
||||
return public_name
|
||||
|
||||
|
||||
async def _get_file(app_name: str, user_id: str, box: DataBox, file_group_name: str, file_name: str) \
|
||||
-> ReadFileResponse | None:
|
||||
"""
|
||||
Возвращает файл, что соответствует существующему в "директории" объекту.
|
||||
|
||||
Если файл отсутствует, то возвращается None.
|
||||
|
||||
Требует, чтобы file_name не оканчивалось символом '/'.
|
||||
"""
|
||||
|
||||
# эти проверки обеспечивают, что в реализации нет ошибок, связанных с разбором переданной
|
||||
# локации и формированием имен файловой группы и файла
|
||||
if not file_name:
|
||||
raise ValueError
|
||||
if file_name:
|
||||
if file_name.endswith('/'):
|
||||
raise ValueError
|
||||
|
||||
in_storage_path = _get_in_storage_path(box, user_id, file_group_name)
|
||||
|
||||
object_name = posixpath.join(in_storage_path, file_name)
|
||||
bucket_name = box.storage_name
|
||||
async_s3_client = await _get_async_s3_client(box)
|
||||
try:
|
||||
_ = await async_s3_client.head_object(box.storage_name, object_name)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
if exc.response.status_code == 404:
|
||||
return None
|
||||
raise exc
|
||||
|
||||
get_url = async_s3_client.presigned_get_object(bucket_name=bucket_name, object_name=object_name)
|
||||
public_name = _get_path_public_name(app_name, box, file_group_name, file_name)
|
||||
|
||||
return ReadFileResponse(name=public_name, presigned_get_url=get_url)
|
||||
|
||||
|
||||
async def _get_file_group_with_files(app_name: str, user_id: str, box: DataBox, file_group_name: str,
|
||||
file_response_cls) \
|
||||
-> FileGroup | None:
|
||||
"""
|
||||
Возвращает файловую группу и ее содержимое, что соответствует существующим в "директории" объектам.
|
||||
|
||||
Для формирования результата рекурсивный обход не осуществляется.
|
||||
|
||||
Если файловая группа является пустой (то есть соответствует пустой "директории"),
|
||||
то возвращается файловая группа с пустым списком файлов.
|
||||
|
||||
Если файловая группа отсутствует (отсутствует даже пустая "директория"), то возвращается None.
|
||||
|
||||
Требует, чтобы file_group_name не оканчивалась символом '/'.
|
||||
"""
|
||||
|
||||
# эти проверки обеспечивают, что в реализации нет ошибок, связанных с разбором переданной
|
||||
# локации и формированием имен файловой группы и файла
|
||||
if not file_group_name:
|
||||
raise ValueError
|
||||
if file_group_name.endswith('/'):
|
||||
raise ValueError
|
||||
|
||||
# важно, чтобы в запросе url оканчивался на '/' иначе вместо папки будет осуществляться поиск файла
|
||||
file_group_name = file_group_name + '/'
|
||||
in_storage_path = _get_in_storage_path(box, user_id, file_group_name)
|
||||
|
||||
bucket_name = box.storage_name
|
||||
async_s3_client = await _get_async_s3_client(box)
|
||||
|
||||
s3_objects = await async_s3_client.list_objects(box.storage_name, in_storage_path)
|
||||
|
||||
# пустой результат, если ни пустой файловой директории, ни файлов с такими префиксами нет
|
||||
if not s3_objects:
|
||||
return None
|
||||
|
||||
files = []
|
||||
fg_public_name = _get_path_public_name(app_name, box, file_group_name, None)
|
||||
for s3_obj in s3_objects:
|
||||
# если есть пустая директория вернется один объект с is_dir == True, она не возвращается;
|
||||
# могут быть вложенные пустые директории (объекты с is_dir == True), они тоже не возвращаются;
|
||||
if s3_obj.is_dir:
|
||||
continue
|
||||
|
||||
rel_path = posixpath.relpath(s3_obj.object_name, in_storage_path)
|
||||
public_file_name = fg_public_name + rel_path
|
||||
if file_response_cls is ReadFileResponse:
|
||||
get_url = async_s3_client.presigned_get_object(bucket_name=bucket_name, object_name=s3_obj.object_name)
|
||||
f = ReadFileResponse(name=public_file_name, presigned_get_url=get_url)
|
||||
else:
|
||||
put_url = async_s3_client.presigned_put_object(bucket_name=bucket_name, object_name=s3_obj.object_name)
|
||||
f = WriteFileResponse(name=public_file_name, presigned_put_url=put_url)
|
||||
files.append(f)
|
||||
|
||||
fg = FileGroup(name=fg_public_name, files=files)
|
||||
|
||||
return fg
|
||||
|
||||
|
||||
async def _create_path(app_name: str, user_id: str, box: DataBox, file_group_name: str,
|
||||
file_name: str | None) -> WriteFileResponse | FileGroup:
|
||||
"""
|
||||
Создает пустой файл или пустую "директорию".
|
||||
|
||||
Требует, чтобы file_group_name, file_name не начинались и не оканчивались символом '/'.
|
||||
"""
|
||||
|
||||
# эти проверки обеспечивают, что в реализации нет ошибок, связанных с разбором переданной
|
||||
# локации и формированием имен файловой группы и файла
|
||||
if not file_group_name:
|
||||
raise ValueError
|
||||
if file_group_name.startswith('/') or file_group_name.endswith('/'):
|
||||
raise ValueError
|
||||
if file_name:
|
||||
if file_name.startswith('/') or file_name.endswith('/'):
|
||||
raise ValueError
|
||||
|
||||
in_storage_path = _get_in_storage_path(box, user_id, file_group_name)
|
||||
|
||||
if file_name:
|
||||
# если имя файла задано, то создать нужно пустой файл
|
||||
object_name = posixpath.join(in_storage_path, file_name)
|
||||
else:
|
||||
# иначе нужно создать пустую директорию,
|
||||
# для этого нужно дописать '/' к имени объекта
|
||||
if not in_storage_path.endswith('/'):
|
||||
in_storage_path = in_storage_path + '/'
|
||||
object_name = in_storage_path
|
||||
|
||||
bucket_name = box.storage_name
|
||||
async_s3_client = await _get_async_s3_client(box)
|
||||
|
||||
_ = await async_s3_client.put_object(bucket_name, object_name)
|
||||
|
||||
if file_name:
|
||||
# если создан файл
|
||||
public_name = _get_path_public_name(app_name, box, file_group_name, file_name)
|
||||
put_url = async_s3_client.presigned_put_object(bucket_name=bucket_name, object_name=object_name)
|
||||
return WriteFileResponse(name=public_name, presigned_put_url=put_url)
|
||||
else:
|
||||
# если создана пустая директория
|
||||
public_name = _get_path_public_name(app_name, box, file_group_name, None)
|
||||
return FileGroup(name=public_name, files=[])
|
||||
|
||||
|
||||
async def _create_file(app_name: str, user_id: str, box: DataBox):
|
||||
"""
|
||||
Создает пустой файл со сгненерированным именем внутри файловой группы
|
||||
со сгенерированным именем.
|
||||
"""
|
||||
file_group_name = str(uuid.uuid4())
|
||||
file_name = str(uuid.uuid4())
|
||||
file = await _create_path(app_name, user_id, box, file_group_name, file_name)
|
||||
return file
|
||||
|
||||
|
||||
async def _create_file_inside_file_group(app_name: str, user_id: str, box: DataBox, file_group_name: str):
|
||||
"""
|
||||
Создает пустой файл со сгненерированным именем внутри файловой группы
|
||||
с заданным именем.
|
||||
"""
|
||||
file_name = str(uuid.uuid4())
|
||||
file = await _create_path(app_name, user_id, box, file_group_name, file_name)
|
||||
return file
|
||||
|
||||
|
||||
async def _create_file_put_presigned_url(app_name: str, user_id: str, box: DataBox, file_group_name: str,
|
||||
file_name: str) -> WriteFileResponse:
|
||||
in_storage_path = _get_in_storage_path(box, user_id, file_group_name)
|
||||
object_name = posixpath.join(in_storage_path, file_name)
|
||||
|
||||
bucket_name = box.storage_name
|
||||
async_s3_client = await _get_async_s3_client(box)
|
||||
|
||||
public_name = _get_path_public_name(app_name, box, file_group_name, file_name)
|
||||
put_url = async_s3_client.presigned_put_object(bucket_name=bucket_name, object_name=object_name)
|
||||
|
||||
file = WriteFileResponse(name=public_name, presigned_put_url=put_url)
|
||||
|
||||
return file
|
||||
|
||||
|
||||
async def _create_file_group(app_name: str, user_id: str, box: DataBox, file_group_name: str) -> FileGroup:
|
||||
fg = await _create_path(app_name, user_id, box, file_group_name, None)
|
||||
return fg
|
||||
|
||||
|
||||
@app.put("/{app_name}/files/{box_name}/{location:path}",
|
||||
response_model=FileGroup | WriteFileResponse)
|
||||
async def put_location_op(response: Response,
|
||||
app_name: str,
|
||||
box_name: str,
|
||||
user_id: Annotated[str, Depends(get_user_id)],
|
||||
location: str | None = None):
|
||||
file_group_name, file_name = parse_location(location)
|
||||
box_name = get_box_name(box_name, app_name, user_id)
|
||||
|
||||
box = await get_box(_k8s_api_client, box_name, app_name)
|
||||
|
||||
file = None
|
||||
fg = None
|
||||
|
||||
if not file_group_name and not file_name:
|
||||
raise HTTPException(status_code=400, detail=f'Location not specified')
|
||||
|
||||
if file_name:
|
||||
# если задано имя файла, то генерируются подписанные URL, пустой файл не создается
|
||||
file = await _create_file_put_presigned_url(app_name, user_id, box, file_group_name, file_name)
|
||||
else:
|
||||
# если задано только имя файловой группы, то создается пустая директория,
|
||||
# возвращается ее содержимое (наиболее вероятно, что пустое)
|
||||
_ = await _create_file_group(app_name, user_id, box, file_group_name)
|
||||
fg = await _get_file_group_with_files(app_name, user_id, box, file_group_name, WriteFileResponse)
|
||||
|
||||
if file:
|
||||
resp = file
|
||||
else:
|
||||
resp = fg
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
@app.post("/{app_name}/files/{box_name}/{location:path}",
|
||||
response_model=FileGroup | WriteFileResponse)
|
||||
@app.post("/{app_name}/files/{box_name}",
|
||||
response_model=FileGroup | WriteFileResponse)
|
||||
async def post_location_op(response: Response,
|
||||
app_name: str,
|
||||
box_name: str,
|
||||
user_id: Annotated[str, Depends(get_user_id)],
|
||||
location: str | None = None):
|
||||
file_group_name, file_name = parse_location(location)
|
||||
box_name = get_box_name(box_name, app_name, user_id)
|
||||
|
||||
box = await get_box(_k8s_api_client, box_name, app_name)
|
||||
|
||||
if file_name:
|
||||
# POST поддерживается только для файловых групп, или если переданная локация пустая,
|
||||
# file_name не должен быть явно указан в локации
|
||||
raise HTTPException(status_code=400, detail=f'Not supported location \'{location}\' for POST; '
|
||||
f'file name must not be specified; given \'{file_name}\'')
|
||||
|
||||
if not file_group_name:
|
||||
# соответствует пустой локации; создается пустой файл и файловая группа
|
||||
# со сгенерированными именами;
|
||||
file = await _create_file(app_name, user_id, box)
|
||||
else:
|
||||
# соответствует location='/file_group/'; создается пустой файл
|
||||
# со сгенерированным именем в заданной файловой группе
|
||||
file = await _create_file_inside_file_group(app_name, user_id, box, file_group_name)
|
||||
|
||||
response.status_code = status.HTTP_201_CREATED
|
||||
return file
|
||||
|
||||
|
||||
@app.get("/{app_name}/files/{box_name}/{location:path}",
|
||||
response_model=FileGroup | ReadFileResponse)
|
||||
async def get_location_op(response: Response,
|
||||
app_name: str,
|
||||
box_name: str,
|
||||
user_id: Annotated[str, Depends(get_user_id)],
|
||||
location: str | None = None):
|
||||
file_group_name, file_name = parse_location(location)
|
||||
box_name = get_box_name(box_name, app_name, user_id)
|
||||
|
||||
box = await get_box(_k8s_api_client, box_name, app_name)
|
||||
|
||||
file = None
|
||||
fg = None
|
||||
|
||||
if not file_group_name and not file_name:
|
||||
raise HTTPException(status_code=404, detail=f'Location not specified')
|
||||
|
||||
if file_name and file_name != '*':
|
||||
file = await _get_file(app_name, user_id, box, file_group_name, file_name)
|
||||
if not file:
|
||||
raise HTTPException(status_code=404, detail=f'File \'{posixpath.join(file_group_name, file_name)}\' '
|
||||
f'doesnt exist')
|
||||
|
||||
else:
|
||||
fg = await _get_file_group_with_files(app_name, user_id, box, file_group_name, ReadFileResponse)
|
||||
if not fg:
|
||||
raise HTTPException(status_code=404, detail=f'File group \'{file_group_name}\' '
|
||||
f'doesnt exist')
|
||||
|
||||
if file:
|
||||
resp = file
|
||||
else:
|
||||
resp = fg
|
||||
|
||||
return resp
|
||||
7
controller/src/files/api/docker.md
Normal file
7
controller/src/files/api/docker.md
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
```
|
||||
docker run --rm -p 8080:8080 \
|
||||
--name swagger-unip-files \
|
||||
-e SWAGGER_FILE=/api/openapi.yaml \
|
||||
-e PORT=8080 \
|
||||
-v $PWD/controller/src/files/api:/api swaggerapi/swagger-editor
|
||||
```
|
||||
483
controller/src/files/api/openapi-snapshot.json
Normal file
483
controller/src/files/api/openapi-snapshot.json
Normal file
|
|
@ -0,0 +1,483 @@
|
|||
{
|
||||
"openapi": "3.0.3",
|
||||
"info": {
|
||||
"title": "ML Component - OpenAPI 3.0",
|
||||
"description": "Спецификция сервиса Files",
|
||||
"termsOfService": "https://platform-dev-cs-hse.objectoriented.ru/terms/",
|
||||
"contact": {
|
||||
"email": "apiteam@platform-dev-cs-hse.objectoriented.ru"
|
||||
},
|
||||
"version": "1.0.0"
|
||||
},
|
||||
"servers": [
|
||||
{
|
||||
"url": "http://localhost:8000"
|
||||
},
|
||||
{
|
||||
"url": "https://platform-dev-cs-hse.objectoriented.ru/app/files"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"name": "files",
|
||||
"description": "Операции с файлами и файловыми группами",
|
||||
"externalDocs": {
|
||||
"description": "Подробнее",
|
||||
"url": "https://platform-dev-cs-hse.objectoriented.ru"
|
||||
}
|
||||
}
|
||||
],
|
||||
"security": [
|
||||
{
|
||||
"basic_auth": []
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"/{app}/files/{box}/{location}": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "X-Request-Id",
|
||||
"in": "header",
|
||||
"schema": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
},
|
||||
"description": "Идентификатор для диагностики запроса",
|
||||
"examples": {
|
||||
"example": {
|
||||
"summary": "Идентификатор из нулей",
|
||||
"description": "Идентификатор в формате UUID",
|
||||
"value": "00000000-0000-0000-0000-000000000000"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "location",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Путь к файловой группе или к файлу.\n\nВ общем случае имеет вид <file_group>/<file>, причем\nимя <file_group>, если задано, может состоять из одного или нескольких сегментов.\nИмя <file>, если задано, может состоять только из одного сегмента.\n\nМожет принимать значения в следующих форматах:\n- '' (новые файловая группа и файл в ней со сгенерированными именами)\n- 'a/ (файловая группа a/)\n- 'a' (файловая группа a/)\n- 'a/b/' (файловая группа a/b/)\n- 'a/b' (файл b в файловой группе a/)\n- 'a/b/c' (файл c в файловой группе a/b/)\n",
|
||||
"examples": {
|
||||
"empty": {
|
||||
"summary": "Пустое значение",
|
||||
"value": "",
|
||||
"description": "Используется при создании новой файловой группы,\nи нового файла в ней. Их имена генерируются автоматически.\n"
|
||||
},
|
||||
"file_group": {
|
||||
"summary": "Файловая группа с простым именем",
|
||||
"value": "a/",
|
||||
"description": "Соответствует файловой группе, имя которой состоит из одного сегмента.\n"
|
||||
},
|
||||
"file_group_complex": {
|
||||
"summary": "Файловая группа с составным именем",
|
||||
"value": "a/b/",
|
||||
"description": "Соответствует файловой группе, имя которой состоит из двух сегментов.\n"
|
||||
},
|
||||
"file": {
|
||||
"summary": "Файл в файловой группе",
|
||||
"value": "a/b",
|
||||
"description": "Соответствует файлу в файловой группе, имя которой состоит из одного сегмента.\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "box",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Имя ящика DataBox"
|
||||
},
|
||||
{
|
||||
"name": "app",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Имя приложения PlatformApp"
|
||||
}
|
||||
],
|
||||
"get": {
|
||||
"tags": [
|
||||
"files"
|
||||
],
|
||||
"summary": "Получить файл или файловую группу",
|
||||
"description": "Возвращает файл или файловую группу.\n\nЕсли локация соответствует файловой группе, то возвращает файловую группу\nсо всеми файлами в ней.\n\nЕсли локация соответствует файлу, то возвращает файл.\n\nПустая локация в GET не поддерживается.",
|
||||
"operationId": "get_files",
|
||||
"responses": {
|
||||
"200": {
|
||||
"$ref": "#/components/responses/successful-read"
|
||||
},
|
||||
"400": {
|
||||
"description": "Ошибка входных данных.\nВозможные причины: \n- Передана пустая локация;\n",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Аутентификация не пройдена",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Не найден запрошенный ресурс.\nВозможные причины: \n- Указанная локация не существует;\n",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/successful-read"
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"tags": [
|
||||
"files"
|
||||
],
|
||||
"summary": "Создать файл или файловую группу",
|
||||
"description": "Создает файл или файловую группу.\n\nЛокация может быть пустой, или соответствовать файловой группе.\nЛокации, соответствующие файлу, в POST не поддерживаются.\n\nЕсли локация соответствует файловой группе, то создает новый пустой файл\nсо сгенерированным именем в заданной файловой группе.\n\nЕсли локация задана пустая, то создает файловую группу и файл в ней \nсо сгенерированными именами.",
|
||||
"operationId": "post_files",
|
||||
"responses": {
|
||||
"201": {
|
||||
"$ref": "#/components/schemas/WriteFileResponse"
|
||||
},
|
||||
"400": {
|
||||
"description": "Ошибка входных данных.\nВозможные причины: \n- Передана локация, соответствующая файлу;\n",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Аутентификация не пройдена",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/schemas/WriteFileResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"put": {
|
||||
"tags": [
|
||||
"files"
|
||||
],
|
||||
"summary": "Разместить файл или файловую группу",
|
||||
"description": "Размещает файл или файловую группу.\n\nЕсли локация соответствует файловой группе, то создает новую пустую \nфайловую группу с заданным именем.\nЕсли файловая группа существует, ничего не делает.\n\nЕсли локация соответствует файлу, то создает подписанные ссылки\nдля чтения и записи содержимого файла. Файл не создается.\n\nПустая локация в PUT не поддерживается.",
|
||||
"operationId": "put_files",
|
||||
"responses": {
|
||||
"200": {
|
||||
"$ref": "#/components/responses/successful-write"
|
||||
},
|
||||
"400": {
|
||||
"description": "Ошибка входных данных.\nВозможные причины: \n- Передана пустая локация;\n",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Аутентификация не пройдена",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/successful-write"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"components": {
|
||||
"schemas": {
|
||||
"ReadFileResponse": {
|
||||
"description": "Файл со ссылкой для получения содержимого",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Имя",
|
||||
"type": "string"
|
||||
},
|
||||
"presigned_get_url": {
|
||||
"description": "Подписанная ссылка для получения содержимого файла",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"presigned_get_url"
|
||||
]
|
||||
},
|
||||
"WriteFileResponse": {
|
||||
"description": "Файл со ссылкой для загрузки содержимого",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Имя",
|
||||
"type": "string"
|
||||
},
|
||||
"presigned_put_url": {
|
||||
"description": "Подписанная ссылка для загрузки содержимого файла",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"presigned_put_url"
|
||||
]
|
||||
},
|
||||
"ReadFileGroupResponse": {
|
||||
"description": "Файловая группа.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Имя.",
|
||||
"type": "string"
|
||||
},
|
||||
"files": {
|
||||
"description": "Список файлов в файловой группе.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ReadFileResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"files"
|
||||
]
|
||||
},
|
||||
"WriteFileGroupResponse": {
|
||||
"description": "Файловая группа.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Имя.",
|
||||
"type": "string"
|
||||
},
|
||||
"files": {
|
||||
"description": "Список файлов в файловой группе.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/WriteFileResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"files"
|
||||
]
|
||||
},
|
||||
"ErrorResponse": {
|
||||
"description": "Объект с результатом операции инференса, которая не завершилась успешно.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"errors": {
|
||||
"description": "Список ошибок.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"description": "Ошибка при выполнении операции инференса.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"description": "Тип ошибки.",
|
||||
"type": "string"
|
||||
},
|
||||
"message": {
|
||||
"description": "Сообщение с описанием ошибки.",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"message"
|
||||
]
|
||||
},
|
||||
"maxItems": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"request_id": {
|
||||
"description": "Идентификатор для диагностики запроса",
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
},
|
||||
"examples": {
|
||||
"example": {
|
||||
"summary": "Идентификатор из нулей",
|
||||
"value": "00000000-0000-0000-0000-000000000000"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"successful-read": {
|
||||
"description": "Операция завершена успешно",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ReadFileResponse"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/WriteFileGroupResponse"
|
||||
}
|
||||
]
|
||||
},
|
||||
"examples": {
|
||||
"file": {
|
||||
"summary": "Файл",
|
||||
"description": "Файл",
|
||||
"value": {
|
||||
"name": "file_group1/file1",
|
||||
"presigned_get_url": "<presigned_get_url>"
|
||||
}
|
||||
},
|
||||
"file_group": {
|
||||
"summary": "Файловая группа",
|
||||
"description": "Файловая группа с файлами",
|
||||
"value": {
|
||||
"name": "file_group1",
|
||||
"files": [
|
||||
{
|
||||
"name": "file1",
|
||||
"presigned_get_url": "<presigned_get_url>"
|
||||
},
|
||||
{
|
||||
"name": "file2",
|
||||
"presigned_get_url": "<presigned_get_url>"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"empty_file_group": {
|
||||
"summary": "Пустая файловая группа",
|
||||
"description": "Файловая группа без файлов",
|
||||
"value": {
|
||||
"name": "file_group2",
|
||||
"files": []
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"summary": "Ошибка",
|
||||
"description": "Ошибка",
|
||||
"value": {
|
||||
"errors": [
|
||||
{
|
||||
"type": "Доступ запрещен",
|
||||
"message": "Реквизиты не предоставлены"
|
||||
},
|
||||
{
|
||||
"type": "Ресурс не найден",
|
||||
"message": "Указанная локация отсутствует"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"successful-write": {
|
||||
"description": "Операция завершена успешно",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/WriteFileResponse"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/WriteFileGroupResponse"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"headers": {
|
||||
"X-Request-Id": {
|
||||
"$ref": "#/components/headers/request_id"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"securitySchemes": {
|
||||
"basic_auth": {
|
||||
"description": "Базовая аутентификация",
|
||||
"type": "http",
|
||||
"scheme": "basic"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
366
controller/src/files/api/openapi.yaml
Normal file
366
controller/src/files/api/openapi.yaml
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Files
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: ML Component - OpenAPI 3.0
|
||||
description: |-
|
||||
Спецификция сервиса Files
|
||||
termsOfService: https://mlops.hse.ru/
|
||||
contact:
|
||||
email: support-mlops@hse.ru
|
||||
version: 1.0.0
|
||||
#externalDocs:
|
||||
# description: Find out more about Swagger
|
||||
# url: http://swagger.io
|
||||
servers:
|
||||
- url: http://localhost:8000
|
||||
tags:
|
||||
- name: files
|
||||
description: Операции с файлами и файловыми группами
|
||||
externalDocs:
|
||||
description: Подробнее
|
||||
url: https://mlops.hse.ru/
|
||||
security:
|
||||
- basic_auth: []
|
||||
paths:
|
||||
'/{app}/files/{box}/{location}':
|
||||
parameters:
|
||||
- name: X-Request-Id
|
||||
in: header
|
||||
schema:
|
||||
$ref: '#/components/headers/request_id'
|
||||
description: Идентификатор для диагностики запроса
|
||||
examples:
|
||||
example:
|
||||
summary: Идентификатор из нулей
|
||||
description: Идентификатор в формате UUID
|
||||
value: 00000000-0000-0000-0000-000000000000
|
||||
- name: location
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: |
|
||||
Путь к файловой группе или к файлу.
|
||||
|
||||
В общем случае имеет вид <file_group>/<file>, причем
|
||||
имя <file_group>, если задано, может состоять из одного или нескольких сегментов.
|
||||
Имя <file>, если задано, может состоять только из одного сегмента.
|
||||
|
||||
Может принимать значения в следующих форматах:
|
||||
- '' (новые файловая группа и файл в ней со сгенерированными именами)
|
||||
- 'a/ (файловая группа a/)
|
||||
- 'a' (файловая группа a/)
|
||||
- 'a/b/' (файловая группа a/b/)
|
||||
- 'a/b' (файл b в файловой группе a/)
|
||||
- 'a/b/c' (файл c в файловой группе a/b/)
|
||||
examples:
|
||||
empty:
|
||||
summary: Пустое значение
|
||||
value: ''
|
||||
description: |
|
||||
Используется при создании новой файловой группы,
|
||||
и нового файла в ней. Их имена генерируются автоматически.
|
||||
file_group:
|
||||
summary: Файловая группа с простым именем
|
||||
value: 'a/'
|
||||
description: |
|
||||
Соответствует файловой группе, имя которой состоит из одного сегмента.
|
||||
file_group_complex:
|
||||
summary: Файловая группа с составным именем
|
||||
value: 'a/b/'
|
||||
description: |
|
||||
Соответствует файловой группе, имя которой состоит из двух сегментов.
|
||||
file:
|
||||
summary: Файл в файловой группе
|
||||
value: 'a/b'
|
||||
description: |
|
||||
Соответствует файлу в файловой группе, имя которой состоит из одного сегмента.
|
||||
- name: box
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Имя ящика DataBox
|
||||
- name: app
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Имя приложения PlatformApp
|
||||
get:
|
||||
tags:
|
||||
- files
|
||||
summary: Получить файл или файловую группу
|
||||
description: |-
|
||||
Возвращает файл или файловую группу.
|
||||
|
||||
Если локация соответствует файловой группе, то возвращает файловую группу
|
||||
со всеми файлами в ней.
|
||||
|
||||
Если локация соответствует файлу, то возвращает файл.
|
||||
|
||||
Пустая локация в GET не поддерживается.
|
||||
operationId: get_files
|
||||
responses:
|
||||
'200':
|
||||
$ref: '#/components/responses/successful-read'
|
||||
'400':
|
||||
description: |
|
||||
Ошибка входных данных.
|
||||
Возможные причины:
|
||||
- Передана пустая локация;
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
'401':
|
||||
description: Аутентификация не пройдена
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
'404':
|
||||
description: |
|
||||
Не найден запрошенный ресурс.
|
||||
Возможные причины:
|
||||
- Указанная локация не существует;
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
default:
|
||||
$ref: '#/components/responses/successful-read'
|
||||
post:
|
||||
tags:
|
||||
- files
|
||||
summary: Создать файл или файловую группу
|
||||
description: |-
|
||||
Создает файл или файловую группу.
|
||||
|
||||
Локация может быть пустой, или соответствовать файловой группе.
|
||||
Локации, соответствующие файлу, в POST не поддерживаются.
|
||||
|
||||
Если локация соответствует файловой группе, то создает новый пустой файл
|
||||
со сгенерированным именем в заданной файловой группе.
|
||||
|
||||
Если локация задана пустая, то создает файловую группу и файл в ней
|
||||
со сгенерированными именами.
|
||||
operationId: post_files
|
||||
responses:
|
||||
'201':
|
||||
$ref: '#/components/schemas/WriteFileResponse'
|
||||
'400':
|
||||
description: |
|
||||
Ошибка входных данных.
|
||||
Возможные причины:
|
||||
- Передана локация, соответствующая файлу;
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
'401':
|
||||
description: Аутентификация не пройдена
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
default:
|
||||
$ref: '#/components/schemas/WriteFileResponse'
|
||||
put:
|
||||
tags:
|
||||
- files
|
||||
summary: Разместить файл или файловую группу
|
||||
description: |-
|
||||
Размещает файл или файловую группу.
|
||||
|
||||
Если локация соответствует файловой группе, то создает новую пустую
|
||||
файловую группу с заданным именем.
|
||||
Если файловая группа существует, ничего не делает.
|
||||
|
||||
Если локация соответствует файлу, то создает подписанные ссылки
|
||||
для чтения и записи содержимого файла. Файл не создается.
|
||||
|
||||
Пустая локация в PUT не поддерживается.
|
||||
operationId: put_files
|
||||
responses:
|
||||
'200':
|
||||
$ref: '#/components/responses/successful-write'
|
||||
'400':
|
||||
description: |
|
||||
Ошибка входных данных.
|
||||
Возможные причины:
|
||||
- Передана пустая локация;
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
'401':
|
||||
description: Аутентификация не пройдена
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
default:
|
||||
$ref: '#/components/responses/successful-write'
|
||||
components:
|
||||
schemas:
|
||||
ReadFileResponse:
|
||||
description: Файл со ссылкой для получения содержимого
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
description: Имя
|
||||
type: string
|
||||
presigned_get_url:
|
||||
description: Подписанная ссылка для получения содержимого файла
|
||||
type: string
|
||||
required: ["name", "presigned_get_url"]
|
||||
WriteFileResponse:
|
||||
description: Файл со ссылкой для загрузки содержимого
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
description: Имя
|
||||
type: string
|
||||
presigned_put_url:
|
||||
description: Подписанная ссылка для загрузки содержимого файла
|
||||
type: string
|
||||
required: ["name", "presigned_put_url"]
|
||||
ReadFileGroupResponse:
|
||||
description: Файловая группа.
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
description: Имя.
|
||||
type: string
|
||||
files:
|
||||
description: Список файлов в файловой группе.
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ReadFileResponse'
|
||||
required: ["name", "files"]
|
||||
WriteFileGroupResponse:
|
||||
description: Файловая группа.
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
description: Имя.
|
||||
type: string
|
||||
files:
|
||||
description: Список файлов в файловой группе.
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/WriteFileResponse'
|
||||
required: ["name", "files"]
|
||||
ErrorResponse:
|
||||
description: Объект с результатом операции инференса, которая не завершилась успешно.
|
||||
type: object
|
||||
properties:
|
||||
errors:
|
||||
description: Список ошибок.
|
||||
type: array
|
||||
items:
|
||||
description: Ошибка при выполнении операции инференса.
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
description: Тип ошибки.
|
||||
type: string
|
||||
message:
|
||||
description: Сообщение с описанием ошибки.
|
||||
type: string
|
||||
required: ["type", "message"]
|
||||
maxItems: 10
|
||||
headers:
|
||||
request_id:
|
||||
description: Идентификатор для диагностики запроса
|
||||
schema:
|
||||
type: string
|
||||
format: uuid
|
||||
examples:
|
||||
example:
|
||||
summary: Идентификатор из нулей
|
||||
value: 00000000-0000-0000-0000-000000000000
|
||||
responses:
|
||||
successful-read:
|
||||
description: Операция завершена успешно
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ReadFileResponse'
|
||||
- $ref: '#/components/schemas/WriteFileGroupResponse'
|
||||
examples:
|
||||
file:
|
||||
summary: Файл
|
||||
description: Файл
|
||||
value:
|
||||
name: file_group1/file1
|
||||
presigned_get_url: "<presigned_get_url>"
|
||||
file_group:
|
||||
summary: Файловая группа
|
||||
description: Файловая группа с файлами
|
||||
value:
|
||||
name: file_group1
|
||||
files:
|
||||
- name: file1
|
||||
presigned_get_url: "<presigned_get_url>"
|
||||
- name: file2
|
||||
presigned_get_url: "<presigned_get_url>"
|
||||
empty_file_group:
|
||||
summary: Пустая файловая группа
|
||||
description: Файловая группа без файлов
|
||||
value:
|
||||
name: file_group2
|
||||
files: []
|
||||
error:
|
||||
summary: Ошибка
|
||||
description: Ошибка
|
||||
value:
|
||||
errors:
|
||||
- type: Доступ запрещен
|
||||
message: Реквизиты не предоставлены
|
||||
- type: Ресурс не найден
|
||||
message: Указанная локация отсутствует
|
||||
successful-write:
|
||||
description: Операция завершена успешно
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/WriteFileResponse'
|
||||
- $ref: '#/components/schemas/WriteFileGroupResponse'
|
||||
headers:
|
||||
X-Request-Id:
|
||||
$ref: '#/components/headers/request_id'
|
||||
securitySchemes:
|
||||
basic_auth:
|
||||
description: Базовая аутентификация
|
||||
type: http
|
||||
scheme: basic
|
||||
144
controller/src/files/box.py
Normal file
144
controller/src/files/box.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Files
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import HTTPException
|
||||
from kubernetes_asyncio.client import CustomObjectsApi, ApiException, CoreV1Api, V1Secret, ApiClient
|
||||
from pydantic import BaseModel
|
||||
|
||||
from files.config import UNIP_BOXES_S3_SECRET_NAME, UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET, \
|
||||
UNIP_BOXES_S3_DEFAULT_HOST, UNIP_BOXES_S3_DEFAULT_REGION
|
||||
from files.exceptions import UnipFilesException
|
||||
|
||||
APP_USER_DATA_DEFAULT_BOX_SUFFIX = '-user-data'
|
||||
UNIP_CONTROLLER_NAMESPACE = 'unip-system-controller'
|
||||
|
||||
|
||||
class DataBox(BaseModel):
|
||||
namespace: str
|
||||
name: str
|
||||
uid: UUID
|
||||
creds_secret_name: str
|
||||
creds_secret_namespace: str
|
||||
storage_sub_path: Optional[str]
|
||||
storage_host: str
|
||||
storage_name: str
|
||||
|
||||
|
||||
def get_box_name(passed_box_name: str, app_name: str, user_id: str) -> str:
|
||||
"""
|
||||
Определяет имя используемого ящика в зависимости от значений переданных параметров.
|
||||
|
||||
:param passed_box_name: Переданное в запросе имя ящика.
|
||||
:param app_name: Имя приложения PlatformApp.
|
||||
:param user_id: Идентификатор пользователя.
|
||||
:return: :class:`str <str>` Имя ящика, который нужно использовать.
|
||||
"""
|
||||
if passed_box_name and passed_box_name != '_':
|
||||
return passed_box_name
|
||||
return app_name + APP_USER_DATA_DEFAULT_BOX_SUFFIX
|
||||
|
||||
|
||||
async def _get_s3_box_creds(api: ApiClient, box: DataBox) -> tuple[str, str, str, str]:
|
||||
core_v1_api = CoreV1Api(api)
|
||||
try:
|
||||
secret: V1Secret = await core_v1_api.read_namespaced_secret(name=box.creds_secret_name,
|
||||
namespace=box.creds_secret_namespace)
|
||||
secret_body = secret.to_dict()
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise UnipFilesException(f'S3 credentials secret "{box.creds_secret_name}" not found')
|
||||
raise exc
|
||||
|
||||
if 'data' not in secret_body:
|
||||
raise UnipFilesException(f'data not set in secret {box.creds_secret_name}')
|
||||
data = secret_body['data']
|
||||
if not (req_fields := {'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'}) <= data.keys():
|
||||
raise UnipFilesException(f'some of {req_fields} not set in secret {box.creds_secret_name}')
|
||||
|
||||
access_key = base64.b64decode(data['AWS_ACCESS_KEY_ID']).decode('utf-8')
|
||||
secret_key = base64.b64decode(data['AWS_SECRET_ACCESS_KEY']).decode('utf-8')
|
||||
endpoint = box.storage_host
|
||||
if 'AWS_REGION' in data:
|
||||
region = base64.b64decode(data['AWS_REGION']).decode('utf-8')
|
||||
else:
|
||||
region = UNIP_BOXES_S3_DEFAULT_REGION
|
||||
return endpoint, access_key, secret_key, region
|
||||
|
||||
|
||||
async def _get_box_resource(co_api: CustomObjectsApi, box_name, namespace):
|
||||
try:
|
||||
# "обычный" метод
|
||||
# co_api.get_namespaced_custom_object("unified-platform.cs.hse.ru", "v1", namespace, "databoxes", box_name)
|
||||
# не является асинхронным по неизвестным причинам, поэтому вызывается "_with_http_info" метод
|
||||
s3_box_res, _, _ = \
|
||||
await co_api.get_namespaced_custom_object_with_http_info("unified-platform.cs.hse.ru",
|
||||
"v1",
|
||||
namespace,
|
||||
"databoxes",
|
||||
box_name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise HTTPException(status_code=404, detail=f'Box \'{box_name}\' does not exist')
|
||||
raise exc
|
||||
|
||||
return s3_box_res
|
||||
|
||||
|
||||
async def get_box(api: ApiClient, box_name, app_name) -> DataBox:
|
||||
"""
|
||||
Возвращает ящик данных.
|
||||
|
||||
:param api: Асинхронный Kubernetes клиент.
|
||||
:param box_name: Переданное в запросе имя ящика.
|
||||
:param app_name: Имя приложения PlatformApp.
|
||||
:return: :class:`DataBox <DataBox>` Ящик данных.
|
||||
"""
|
||||
co_api = CustomObjectsApi(api)
|
||||
box_res = await _get_box_resource(co_api, box_name, app_name)
|
||||
box_spec = box_res['spec']
|
||||
|
||||
if 's3Storage' not in box_spec \
|
||||
and 's3DefaultStorage' not in box_spec \
|
||||
and 'datasetReference' not in box_spec:
|
||||
raise UnipFilesException(f's3Storage, s3DefaultStorage, datasetReference inside Box {box_name} is expected, '
|
||||
f'but not presented')
|
||||
if 's3Storage' in box_spec:
|
||||
s3_storage = box_spec['s3Storage']
|
||||
if 'awsVarsS3Credentials' not in s3_storage:
|
||||
raise UnipFilesException(f's3Storage inside Box {box_name} is expected, but not presented')
|
||||
|
||||
creds_secret_name = s3_storage['awsVarsS3Credentials']
|
||||
storage_host = s3_storage['host']
|
||||
bucket = s3_storage['bucket']
|
||||
storage_name = bucket['name']
|
||||
namespace = creds_secret_namespace = app_name
|
||||
if 'subPath' in bucket:
|
||||
storage_sub_path = s3_storage['bucket']['subPath']
|
||||
else:
|
||||
storage_sub_path = None
|
||||
elif 's3DefaultStorage' in box_spec:
|
||||
creds_secret_name = UNIP_BOXES_S3_SECRET_NAME
|
||||
creds_secret_namespace = UNIP_CONTROLLER_NAMESPACE
|
||||
namespace = app_name
|
||||
storage_sub_path = app_name + '/' + box_name
|
||||
storage_name = UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET
|
||||
storage_host = UNIP_BOXES_S3_DEFAULT_HOST
|
||||
elif 'datasetReference' in box_spec:
|
||||
raise HTTPException(status_code=404, detail=f'Box \'{box_name}\' does not exist')
|
||||
else:
|
||||
raise UnipFilesException(f's3Storage, s3DefaultStorage or datasetReference inside Box {box_name} is expected, '
|
||||
f'but not presented')
|
||||
|
||||
box_uid = UUID(box_res['metadata']['uid'])
|
||||
|
||||
return DataBox(name=box_name, uid=box_uid, creds_secret_name=creds_secret_name,
|
||||
creds_secret_namespace=creds_secret_namespace,
|
||||
namespace=namespace, storage_sub_path=storage_sub_path, storage_name=storage_name,
|
||||
storage_host=storage_host)
|
||||
14
controller/src/files/config.py
Normal file
14
controller/src/files/config.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Files
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import os
|
||||
|
||||
UNIP_FILES_RUN_MODE = os.getenv('UNIP_FILES_RUN_MODE')
|
||||
|
||||
UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET = os.getenv('UNIP_BOXES_S3_DEFAULT_USER_DATA_BUCKET')
|
||||
UNIP_BOXES_S3_SECRET_NAME = os.getenv('UNIP_BOXES_S3_SECRET_NAME')
|
||||
UNIP_BOXES_S3_DEFAULT_HOST = os.getenv('UNIP_BOXES_S3_DEFAULT_HOST')
|
||||
UNIP_BOXES_S3_DEFAULT_REGION = os.getenv('UNIP_BOXES_S3_DEFAULT_REGION') # "ru-central1"
|
||||
9
controller/src/files/exceptions.py
Normal file
9
controller/src/files/exceptions.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Files
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
|
||||
class UnipFilesException(Exception):
|
||||
pass
|
||||
32
controller/src/files/logging.py
Normal file
32
controller/src/files/logging.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Files
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from logging.config import dictConfig
|
||||
|
||||
|
||||
def configure_logging():
|
||||
dictConfig({
|
||||
'version': 1,
|
||||
'disable_existing_loggers': False,
|
||||
'formatters': {
|
||||
'default': {
|
||||
'format': '[%(asctime)s] [%(levelname)s] [%(name)s]: %(message)s',
|
||||
}
|
||||
},
|
||||
'handlers': {
|
||||
'default': {
|
||||
'class': 'logging.StreamHandler',
|
||||
'stream': 'ext://sys.stdout',
|
||||
'formatter': 'default'
|
||||
}
|
||||
},
|
||||
'loggers': {
|
||||
'root': {
|
||||
'level': 'INFO',
|
||||
'handlers': ['default']
|
||||
}
|
||||
}
|
||||
})
|
||||
31
controller/src/files/s3/aws_signing_libs.md
Normal file
31
controller/src/files/s3/aws_signing_libs.md
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# aws-request-signer
|
||||
|
||||
Используемая библиотека.
|
||||
|
||||
https://github.com/iksteen/aws-request-signer
|
||||
|
||||
Структурированный код, нет зависимостей от HTTP библиотек.
|
||||
|
||||
https://github.com/iksteen/aws-request-signer/blob/master/aws_request_signer/__init__.py
|
||||
|
||||
# Minio
|
||||
|
||||
Только синхронная реализация. Пример интерфейса и реализации
|
||||
|
||||
minio/api.py
|
||||
|
||||
# fastaws
|
||||
|
||||
Зависимость от HTTP библиотеки, пример реализации.
|
||||
|
||||
https://github.com/waydegg/fastaws/blob/master/src/fastaws/core.py
|
||||
https://github.com/waydegg/fastaws/blob/master/src/fastaws/s3/client.py
|
||||
|
||||
# requests-auth-aws-sigv4
|
||||
|
||||
https://github.com/andrewjroth/requests-auth-aws-sigv4
|
||||
|
||||
Зависимость от HTTP библиотек, код плохо структурирован.
|
||||
|
||||
https://github.com/iksteen/aws-request-signer
|
||||
https://github.com/iksteen/aws-request-signer/blob/master/aws_request_signer/__init__.py
|
||||
271
controller/src/files/s3/client.py
Normal file
271
controller/src/files/s3/client.py
Normal file
|
|
@ -0,0 +1,271 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Files
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import posixpath
|
||||
import urllib.parse
|
||||
from typing import Optional, List, Union
|
||||
|
||||
import httpx
|
||||
from aws_request_signer import AwsRequestSigner, UNSIGNED_PAYLOAD
|
||||
from lxml import etree
|
||||
from lxml.etree import Element
|
||||
|
||||
XML_NAMESPACES = {'s3': 'http://s3.amazonaws.com/doc/2006-03-01/'}
|
||||
|
||||
|
||||
class S3Object:
|
||||
|
||||
def __init__(self, bucket_name: str, object_name: Optional[str]):
|
||||
self.__bucket_name: str = bucket_name
|
||||
self.__object_name: Optional[str] = object_name
|
||||
|
||||
@property
|
||||
def bucket_name(self) -> str:
|
||||
return self.__bucket_name
|
||||
|
||||
@property
|
||||
def object_name(self) -> Optional[str]:
|
||||
return self.__object_name
|
||||
|
||||
@property
|
||||
def is_dir(self) -> bool:
|
||||
return self.__object_name is not None and self.__object_name.endswith("/")
|
||||
|
||||
|
||||
class AsyncS3Client:
|
||||
"""
|
||||
Асинхронный клиент для выполнения операций с объектами S3.
|
||||
|
||||
Использует HTTP клиент, правление жизненным циклом которого
|
||||
осуществляется вне AsyncS3Client.
|
||||
Это может быть изменено в будущем.
|
||||
|
||||
:param http_client: Асинхронный HTTP клиент.
|
||||
:param endpoint: Имя хоста S3 сервиса.
|
||||
:param access_key: Ключ доступа.
|
||||
:param secret_key: Секрет.
|
||||
:param region: Регион.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
http_client: httpx.AsyncClient,
|
||||
endpoint: str,
|
||||
access_key: str,
|
||||
secret_key: str,
|
||||
region: str,
|
||||
logger=None):
|
||||
if not endpoint.startswith('http'):
|
||||
endpoint = 'https://' + endpoint
|
||||
|
||||
self.logger = logger if logger else logging.getLogger(__name__)
|
||||
|
||||
endpoint = endpoint.rstrip('/')
|
||||
self.__endpoint = endpoint
|
||||
self.__access_key = access_key
|
||||
self.__secret_key = secret_key
|
||||
self.__region = region
|
||||
self.__signer = AwsRequestSigner(access_key_id=access_key,
|
||||
secret_access_key=secret_key,
|
||||
region=region,
|
||||
service='s3')
|
||||
self.__http_client = http_client
|
||||
|
||||
async def list_objects(self, bucket_name: str, prefix: Optional[str] = None, recursive: bool = False,
|
||||
max_keys: Optional[int] = None) -> List[S3Object]:
|
||||
"""
|
||||
Возвращает список S3 объектов в результате вызова метода GET c URL
|
||||
{endpoint}/{bucket_name}?prefix={prefix}
|
||||
|
||||
Если результат запроса пустой, то возвращается пустой список.
|
||||
|
||||
Не выполняет нормализацию префикса, то есть префиксы 'obj' и 'obj/' приведут
|
||||
к различным результатам.
|
||||
|
||||
:param bucket_name: Имя бакета.
|
||||
:param prefix: Значение параметра запроса prefix.
|
||||
:param recursive: Если False, то добавляет параметр запроса delimiter со значением '/'.
|
||||
:param max_keys: Значение параметра запроса max-keys.
|
||||
:return: :class:`List[S3Object] <List[S3Object]>` список S3 объектов.
|
||||
:raises:
|
||||
httpx.HTTPStatusError: если код ответа отличается от 200.
|
||||
"""
|
||||
delimiter = None if recursive else "/"
|
||||
query_params = {}
|
||||
if prefix:
|
||||
query_params["prefix"] = prefix
|
||||
if max_keys:
|
||||
query_params["max-keys"] = max_keys
|
||||
if delimiter:
|
||||
query_params["delimiter"] = delimiter
|
||||
|
||||
bucket_name = posixpath.normpath(bucket_name)
|
||||
|
||||
query_string_items = []
|
||||
if query_params:
|
||||
for key, value in query_params.items():
|
||||
if isinstance(value, str):
|
||||
value_qs = urllib.parse.quote(value, safe="")
|
||||
else:
|
||||
value_qs = value
|
||||
param_qs = f"{key}={value_qs}"
|
||||
query_string_items.append(param_qs)
|
||||
query_string = '&'.join(query_string_items)
|
||||
|
||||
url = f'{self.__endpoint}/{bucket_name}?{query_string}'
|
||||
|
||||
method = 'GET'
|
||||
headers = self.__signer.sign_with_headers(method=method, url=url)
|
||||
|
||||
response = await self.__http_client.request(method=method,
|
||||
url=url,
|
||||
headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
response.raise_for_status()
|
||||
|
||||
root: Element = etree.XML(response.content)
|
||||
|
||||
s3_objects = []
|
||||
content_elements = root.findall(".//s3:Contents", XML_NAMESPACES)
|
||||
for ce in content_elements:
|
||||
key = ce.find('s3:Key', XML_NAMESPACES).text
|
||||
s3_object = S3Object(bucket_name=bucket_name,
|
||||
object_name=key)
|
||||
s3_objects.append(s3_object)
|
||||
|
||||
return s3_objects
|
||||
|
||||
async def head_object(self, bucket_name: str, object_name: str) -> S3Object:
|
||||
"""
|
||||
Выполняет запрос HEAD c URL
|
||||
{endpoint}/{bucket_name}/{object_name}
|
||||
|
||||
Если объект существует, возвращает его.
|
||||
|
||||
Если объект не существует, то выбрасывается исключение httpx.HTTPStatusError
|
||||
с кодом 404.
|
||||
|
||||
Не выполняет нормализацию object_name, то есть имена объектов 'obj' и 'obj/' приведут
|
||||
к различным результатам.
|
||||
|
||||
:param bucket_name: Имя бакета.
|
||||
:param object_name: Имя (ключ) объекта..
|
||||
:return: :class:`S3Object <S3Object>` S3 объект.
|
||||
:raises:
|
||||
httpx.HTTPStatusError: если код ответа отличается от 200.
|
||||
"""
|
||||
|
||||
url = f'{self.__endpoint}/{bucket_name}/{object_name}'
|
||||
|
||||
method = 'HEAD'
|
||||
headers = self.__signer.sign_with_headers(method=method, url=url)
|
||||
bucket_name = posixpath.normpath(bucket_name)
|
||||
|
||||
response = await self.__http_client.request(method=method,
|
||||
url=url,
|
||||
headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
response.raise_for_status()
|
||||
|
||||
return S3Object(bucket_name=bucket_name, object_name=object_name)
|
||||
|
||||
async def put_object(self, bucket_name: str, object_name: str, data: Optional[Union[dict, str, bytes]] = None):
|
||||
"""
|
||||
Выполняет запрос PUT запрос с URL {endpoint}/{bucket_name}/{object_name},
|
||||
сохраняет переданные данные как содержимое объекта.
|
||||
|
||||
Не выполняет нормализацию object_name, то есть имена объектов 'obj' и 'obj/' приведут
|
||||
к различным результатам.
|
||||
|
||||
Если данные data не переданы, то в теле запроса будет передано пустое содержимое.
|
||||
|
||||
Если данные data переданы типа bytes, то они передаются в теле запроса без изменений.
|
||||
|
||||
Если данные data переданы типа str, то они кодируются (переводятся в bytes), используя
|
||||
кодировку 'utf-8'.
|
||||
|
||||
Если данные data переданы типа dict, то они сначала сериализуются в строку, используя
|
||||
json.dumps(data).
|
||||
|
||||
:param bucket_name: Имя бакета.
|
||||
:param object_name: Имя (ключ) объекта.
|
||||
:param data: Содержимое объекта.
|
||||
:return: None.
|
||||
:raises:
|
||||
httpx.HTTPStatusError: если код ответа отличается от 200.
|
||||
"""
|
||||
content = b""
|
||||
if data is not None:
|
||||
if isinstance(data, dict):
|
||||
content = json.dumps(data).encode('utf-8')
|
||||
elif isinstance(data, bytes):
|
||||
content = data
|
||||
elif isinstance(data, str):
|
||||
content = data.encode('utf-8')
|
||||
|
||||
bucket_name = posixpath.normpath(bucket_name)
|
||||
url = f'{self.__endpoint}/{bucket_name}/{object_name}'
|
||||
method = 'PUT'
|
||||
|
||||
content_hash = hashlib.sha256(content).hexdigest()
|
||||
|
||||
headers = self.__signer.sign_with_headers(method=method, url=url, content_hash=content_hash)
|
||||
self.logger.info(f'put object {url}')
|
||||
response = await self.__http_client.request(method=method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
content=content)
|
||||
|
||||
if response.status_code != 200:
|
||||
response.raise_for_status()
|
||||
|
||||
def presigned_get_object(self, bucket_name: str, object_name: str, expires: int = 86400):
|
||||
"""
|
||||
Генерирует подписанную ссылку для получения содержимого объекта по URL
|
||||
{endpoint}/{bucket_name}/{object_name}
|
||||
|
||||
Не выполняет нормализацию object_name, то есть имена объектов 'obj' и 'obj/' приведут
|
||||
к различным результатам.
|
||||
|
||||
Не выполняет обращения к удаленному хранилищу.
|
||||
|
||||
Не выполняет подписывание содержимого.
|
||||
|
||||
:param bucket_name: Имя бакета.
|
||||
:param object_name: Имя (ключ) объекта.
|
||||
:param expires: Время жизни URL в секундах. По умолчанию равно 24 часам.
|
||||
:return: :class:`str <str>` Подписанная ссылка.
|
||||
"""
|
||||
bucket_name = posixpath.normpath(bucket_name)
|
||||
url = f'{self.__endpoint}/{bucket_name}/{object_name}'
|
||||
method = 'GET'
|
||||
return self.__signer.presign_url(method=method, url=url, content_hash=UNSIGNED_PAYLOAD, expires=expires)
|
||||
|
||||
def presigned_put_object(self, bucket_name: str, object_name: str, expires: int = 86400):
|
||||
"""
|
||||
Генерирует подписанную ссылку для загрузки содержимого объекта по URL
|
||||
{endpoint}/{bucket_name}/{object_name}
|
||||
|
||||
Не выполняет нормализацию object_name, то есть имена объектов 'obj' и 'obj/' приведут
|
||||
к различным результатам.
|
||||
|
||||
Не выполняет обращения к удаленному хранилищу.
|
||||
|
||||
Не выполняет подписывание содержимого.
|
||||
|
||||
:param bucket_name: Имя бакета.
|
||||
:param object_name: Имя (ключ) объекта.
|
||||
:param expires: Время жизни URL в секундах. По умолчанию равно 24 часам.
|
||||
:return: :class:`str <str>` Подписанная ссылка.
|
||||
"""
|
||||
bucket_name = posixpath.normpath(bucket_name)
|
||||
url = f'{self.__endpoint}/{bucket_name}/{object_name}'
|
||||
method = 'PUT'
|
||||
return self.__signer.presign_url(method=method, url=url, content_hash=UNSIGNED_PAYLOAD, expires=expires)
|
||||
172
controller/src/files/s3/s3_url_notes.md
Normal file
172
controller/src/files/s3/s3_url_notes.md
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
---
|
||||
**Система**: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
|
||||
**Модуль**: Files
|
||||
|
||||
**Авторы**: Полежаев В.А., Хританков А.С.
|
||||
|
||||
**Дата создания**: 2024 г.
|
||||
|
||||
---
|
||||
|
||||
# Директории (папки)
|
||||
|
||||
Понятия директории нет, но префиксы имен (ключ Key) существующих объектов можно считать директориями.
|
||||
|
||||
## Получение директорий и содержимого
|
||||
|
||||
Получить содержимое "директории" можно только при помощи запроса listObjects (или listObjectsV2)
|
||||
с префиксом, соответствующим имени директории.
|
||||
|
||||
Глубиной возвращаемых результатов (рекурсией) управляет параметр delimeter.
|
||||
Можно сформулировать правило:
|
||||
`delimiter = '/' if not recursive else None`
|
||||
|
||||
При этом префикс должен оканчиваться '/'.
|
||||
|
||||
Если обратиться к директории без prefix (то есть как к объекту),
|
||||
то при существовании директории (как префикса хотя бы одного объекта,
|
||||
или как пустой директории, см. ниже) вернется 200 и пустой ответ.
|
||||
Если директория не существует (нет объектов с соответствующим префиксом, нет пустой директории),
|
||||
то вернется 404.
|
||||
|
||||
Примеры:
|
||||
|
||||
Содержимое директории fg3 (список объектов в директории):
|
||||
```
|
||||
GET https://storage.hosting.net/bucket?prefix=path/fg3/&delimiter=/
|
||||
```
|
||||
При наличии объектов сама директория как объект в результате-списке не представляется.
|
||||
|
||||
Вернет пустой результат (пустой список), если нет объектов с префиксом fg3 в имени,
|
||||
иначе вернет их:
|
||||
```
|
||||
GET https://storage.hosting.net/bucket?prefix=path/fg3&delimiter=/
|
||||
```
|
||||
|
||||
Если директории fg3 нет, то вернется пустой результат (пустой список) с кодом 200:
|
||||
```
|
||||
GET https://storage.hosting.net/bucket?prefix=path/fg3/&delimiter=/
|
||||
```
|
||||
|
||||
Вернет 200 и пустой ответ, если директория существует (как префикс существующих объектов,
|
||||
или как пустая директория), 404 иначе:
|
||||
```
|
||||
GET https://storage.hosting.net/bucket/path/fg3/
|
||||
```
|
||||
|
||||
В последнем примере можно использовать HEAD.
|
||||
Можно отметить, что запрос такого вида можно использовать одновременно для проверки наличия и директории,
|
||||
и файла.
|
||||
|
||||
## Создание директории
|
||||
|
||||
Создать директорию можно неявно, создав объект, для имени которого имя "директории" будет являться
|
||||
префиксом.
|
||||
|
||||
Или явно как пустую директорию, см. ниже.
|
||||
|
||||
## Пустые директории
|
||||
|
||||
Понятия директории нет, но есть "пустые" директории.
|
||||
|
||||
Это объекты:
|
||||
1) имя которых (ключ Key) оканчивается на '/', и при этом
|
||||
2) имя которых не является префиксом для других объектов
|
||||
|
||||
### Создание
|
||||
|
||||
"Пустую" директорию можно создать, если:
|
||||
1) выполнить PUT с именем (ключом), которое оканчивается на '/',
|
||||
2) ключ не является префиксом для других существующих объектов (не других пустых директорий)
|
||||
3) передать в теле пустое содержимое (опционально, если имя оканчиается на '/', то пустая
|
||||
директория будет создана при любом содержимом)
|
||||
|
||||
```
|
||||
PUT https://storage.hosting.net/bucket/path/fg4/
|
||||
```
|
||||
|
||||
### Получение
|
||||
|
||||
"Пустую" директорию как объект можно получить в результате запроса
|
||||
(аналогично "обычным" директориям, см. выше, используя GET с prefix
|
||||
или GET без prefix)
|
||||
|
||||
Содержимое пустой директории fg4 - один объект fg4/ с размером 0 (список с одним объектом):
|
||||
```
|
||||
GET https://storage.hosting.net/bucket?prefix=path/fg4/&delimiter=/
|
||||
```
|
||||
|
||||
Пустой результат (пустой список), если нет объектов с префиксом fg4:
|
||||
```
|
||||
GET https://storage.hosting.net/bucket?prefix=path/fg4&delimiter=/
|
||||
```
|
||||
|
||||
Вернет 200 и пустой ответ, если директория существует, 404 иначе:
|
||||
```
|
||||
GET https://storage.hosting.net/bucket/path/fg3/
|
||||
```
|
||||
|
||||
### Действия
|
||||
|
||||
Если в "пустую" директорию добавить "настоящий" объект, то директория как объект исчезнет из результата запроса.
|
||||
Возвращаться будет только один добавленный объект.
|
||||
|
||||
Если в "пустую" директорию добавить вложенную "пустую" директорию, то директория останется в результате запроса.
|
||||
Также в результат запроса добавится созданная вложенная "пустая" директория.
|
||||
|
||||
Если при наличии "пустой" директории fg4/ добавить файл fg4, то они будут существовать одновременно
|
||||
(как "пустая" директория и объект):
|
||||
|
||||
```
|
||||
PUT https://storage.hosting.net/bucket/path/fg4/
|
||||
PUT https://storage.hosting.net/bucket/path/fg4
|
||||
```
|
||||
|
||||
# Объекты
|
||||
|
||||
Имена объектов (ключи Key) не должны оканчиваться '/'. Тогда объект можно создать:
|
||||
|
||||
```
|
||||
PUT https://storage.hosting.net/bucket/path/f5
|
||||
```
|
||||
|
||||
Можно создать пустой файл, если в теле запроса не передать содержимое.
|
||||
|
||||
Можно повторно положить объект, заменив существующий (семантика PUT).
|
||||
|
||||
Если при создании объекта, имя объекта оканчивается на '/', то независимо от переданного содержимого
|
||||
будет создана пустая директория.
|
||||
|
||||
# Пример тестового запроса
|
||||
|
||||
```
|
||||
import hashlib
|
||||
|
||||
import requests
|
||||
from aws_request_signer import AwsRequestSigner
|
||||
|
||||
AWS_REGION = "ru-central1"
|
||||
AWS_ACCESS_KEY_ID = ""
|
||||
AWS_SECRET_ACCESS_KEY = ""
|
||||
|
||||
content = b"Hello, World!\n" # can be empty
|
||||
content_hash = hashlib.sha256(content).hexdigest()
|
||||
|
||||
request_signer = AwsRequestSigner(
|
||||
AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, "s3"
|
||||
)
|
||||
|
||||
URL = "https://storage.host.net/bucket/path/fg/"
|
||||
#URL = "https://storage.host.net/bucket&prefix=path/fg/&delimiter=/"
|
||||
|
||||
headers = request_signer.sign_with_headers("GET", URL)
|
||||
# for method with body
|
||||
# headers = request_signer.sign_with_headers("PUT", URL, content_hash=content_hash)
|
||||
|
||||
r = requests.get(URL, headers=headers)
|
||||
#r = requests.put(URL, headers=headers, data=content)
|
||||
|
||||
print(r.text)
|
||||
print(r.status_code)
|
||||
```
|
||||
38
controller/src/kopf_k8s_client.py
Normal file
38
controller/src/kopf_k8s_client.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import kopf
|
||||
import kubernetes
|
||||
import kubernetes_asyncio
|
||||
|
||||
from kube_config import unip_load_kube_config, async_unip_load_kube_config
|
||||
|
||||
api_client: kubernetes.client.ApiClient | None = None
|
||||
async_api_client: kubernetes_asyncio.client.ApiClient | None = None
|
||||
|
||||
|
||||
@kopf.on.startup(id='setup-k8s-client')
|
||||
def setup_k8s_api_client(logger, **kwargs):
|
||||
unip_load_kube_config(logger)
|
||||
global api_client
|
||||
api_client = kubernetes.client.ApiClient()
|
||||
|
||||
|
||||
@kopf.on.cleanup(id='teardown-k8s-client')
|
||||
def tear_down_k8s_api_client(logger, **kwargs):
|
||||
api_client.close()
|
||||
|
||||
|
||||
@kopf.on.startup(id='setup-async-k8s-client')
|
||||
async def setup_async_k8s_api_client(logger, **kwargs):
|
||||
await async_unip_load_kube_config(logger)
|
||||
global async_api_client
|
||||
async_api_client = kubernetes_asyncio.client.ApiClient()
|
||||
|
||||
|
||||
@kopf.on.cleanup(id='teardown-async-k8s-client')
|
||||
async def tear_down_async_k8s_api_client(logger, **kwargs):
|
||||
await async_api_client.close()
|
||||
40
controller/src/kube_config.py
Normal file
40
controller/src/kube_config.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import kubernetes
|
||||
import kubernetes_asyncio
|
||||
|
||||
|
||||
def unip_load_kube_config(logger):
|
||||
logger.info("Sync load kubeconfig")
|
||||
try:
|
||||
kubernetes.config.load_incluster_config()
|
||||
logger.info("Client is configured in cluster with service account.")
|
||||
except kubernetes.config.ConfigException as exc1:
|
||||
logger.debug('Cannot configure client in-cluster', exc_info=exc1)
|
||||
try:
|
||||
kubernetes.config.load_kube_config()
|
||||
logger.info("Client is configured via kubeconfig file.")
|
||||
except kubernetes.config.ConfigException as exc2:
|
||||
logger.debug('Cannot configure client via kubeconfig', exc_info=exc2)
|
||||
raise Exception("Cannot authenticate the client library "
|
||||
"neither in-cluster, nor via kubeconfig.")
|
||||
|
||||
|
||||
async def async_unip_load_kube_config(logger):
|
||||
logger.info("Async load kubeconfig")
|
||||
try:
|
||||
kubernetes_asyncio.config.load_incluster_config()
|
||||
logger.info("Async client is configured in cluster with service account.")
|
||||
except kubernetes_asyncio.config.ConfigException as exc1:
|
||||
logger.debug('Cannot configure async client in-cluster', exc_info=exc1)
|
||||
try:
|
||||
await kubernetes_asyncio.config.load_kube_config()
|
||||
logger.info("Async client is configured via kubeconfig file.")
|
||||
except kubernetes_asyncio.config.ConfigException as exc2:
|
||||
logger.debug('Cannot configure async client via kubeconfig', exc_info=exc2)
|
||||
raise Exception("Cannot authenticate the async client library "
|
||||
"neither in-cluster, nor via kubeconfig.")
|
||||
64
controller/src/location.py
Normal file
64
controller/src/location.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import urllib.parse
|
||||
import posixpath
|
||||
|
||||
|
||||
def parse_location(location: str) -> tuple[str | None, str | None]:
|
||||
"""
|
||||
Выполняет парсинг строки локации, чтобы выделить имя файловой группы
|
||||
и имя файла.
|
||||
|
||||
Если локация является пустой ('', '/', None), то файловая группа = None, файл = None.
|
||||
|
||||
Если локация состоит из одного сегмента ('fg', 'fg/', то файловая группа = fg, файл = None.
|
||||
|
||||
Если локация состоит из любого числа сегментов, но оканчивается на '/'
|
||||
('f/g/', 'f/g/h/'), то файловая группа = локация целиком, файл = None.
|
||||
|
||||
Если локация состоит из более чем одного сегмента и не оканчивается на '/' (
|
||||
('fg/f1', 'f/g/f2'), то файловая группа = все сегменты, кроме последнего,
|
||||
файл - последний сегмент.
|
||||
|
||||
Имена файловой группы и файла, которые возвращаются, всегда не начинаются с символа '/'
|
||||
и всегда не знаканчиваются символом '/'.
|
||||
|
||||
>>> parse_location('/')
|
||||
(None, None)
|
||||
|
||||
>>> parse_location('/a')
|
||||
('a', None)
|
||||
|
||||
>>> parse_location('a')
|
||||
('a', None)
|
||||
|
||||
>>> parse_location('a/b/')
|
||||
('a/b', None)
|
||||
|
||||
>>> parse_location('a/b')
|
||||
('a', 'b')
|
||||
"""
|
||||
file_group_name = None
|
||||
file_name = None
|
||||
|
||||
if location in {'/', '', None}:
|
||||
return file_group_name, file_name
|
||||
|
||||
loc_unquoted = urllib.parse.unquote(location)
|
||||
loc_norm = posixpath.normpath(loc_unquoted)
|
||||
if loc_norm.startswith('/'):
|
||||
loc_norm = loc_norm[1:]
|
||||
left_part, right_segment = posixpath.split(loc_norm)
|
||||
if left_part == '':
|
||||
file_group_name = right_segment
|
||||
elif loc_unquoted.endswith(posixpath.sep):
|
||||
file_group_name = loc_norm
|
||||
else:
|
||||
file_group_name = left_part
|
||||
file_name = right_segment
|
||||
|
||||
return file_group_name, file_name
|
||||
26
controller/src/main.py
Normal file
26
controller/src/main.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
# noinspection PyUnresolvedReferences
|
||||
import config
|
||||
# noinspection PyUnresolvedReferences
|
||||
import platform_user.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import mlcmp.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import platform_app.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import box.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import exp_pipeline.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import apicmp.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import cmplink.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import repository.handlers
|
||||
# noinspection PyUnresolvedReferences
|
||||
import datasetcmp.handlers
|
||||
0
controller/src/mlcmp/__init__.py
Normal file
0
controller/src/mlcmp/__init__.py
Normal file
247
controller/src/mlcmp/connected_boxes.py
Normal file
247
controller/src/mlcmp/connected_boxes.py
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: MLComponent
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import base64
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
from kubernetes.client import CoreV1Api, ApiException, V1Secret, CustomObjectsApi, ApiClient
|
||||
|
||||
from exceptions import InputValidationPermanentError, ObjectDoesNotExistTemporaryError, \
|
||||
RelatedObjectNotReadyTemporaryError
|
||||
|
||||
|
||||
# todo: MLComponentEnvVar можно перенести в handlers, убрать завиимость connected_boxes от MLComponentEnvVar
|
||||
@dataclass
|
||||
class MLComponentEnvVar:
|
||||
name: str
|
||||
value: str
|
||||
|
||||
|
||||
def _check_pvc_exists(core_v1_api: CoreV1Api, namespace, name, logger):
|
||||
try:
|
||||
core_v1_api.read_namespaced_persistent_volume_claim(name, namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise ObjectDoesNotExistTemporaryError(f"PVC {name} in namespace {namespace} doesnt exist")
|
||||
raise exc
|
||||
logger.info(f"PVC {name} in namespace {namespace} exists, ok")
|
||||
|
||||
|
||||
def _get_csi_s3_pvcs(core_v1_api: CoreV1Api, logger, namespace,
|
||||
connected_box_name, mount_path, sub_path, pvc_name):
|
||||
_check_pvc_exists(core_v1_api, namespace, pvc_name, logger)
|
||||
|
||||
# todo: validate connected box name (earlier?)
|
||||
volume_in_spec_name = connected_box_name.replace('_', '-')
|
||||
|
||||
volume_spec_to_use = {'volume_in_spec_name': volume_in_spec_name,
|
||||
'mount_path': mount_path,
|
||||
'sub_path': sub_path,
|
||||
'pvc_name': pvc_name}
|
||||
|
||||
return volume_spec_to_use
|
||||
|
||||
|
||||
def _get_s3_box(co_api: CustomObjectsApi, s3_box_name, namespace, expected_box_types=None):
|
||||
try:
|
||||
s3_box_res = co_api.get_namespaced_custom_object("unified-platform.cs.hse.ru", "v1", namespace,
|
||||
"databoxes", s3_box_name)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
raise InputValidationPermanentError(f'Box {s3_box_name} does not exist, but required')
|
||||
raise exc
|
||||
|
||||
if not expected_box_types:
|
||||
expected_box_types = {'s3Storage', 's3DefaultStorage'}
|
||||
|
||||
if not any(et in s3_box_res['spec'] for et in expected_box_types):
|
||||
raise InputValidationPermanentError(f' One of {expected_box_types} is expected, but not presented '
|
||||
f'in repository box {s3_box_name}')
|
||||
if 's3Storage' not in s3_box_res['status']:
|
||||
raise InputValidationPermanentError(f's3Storage {s3_box_name} status is expected, but not presented')
|
||||
|
||||
s3_box_status_conditions = s3_box_res['status']['conditions']
|
||||
ready = [c for c in s3_box_status_conditions if c['type'] == 'Ready']
|
||||
if not ready or ready[0]['conditionStatus'] != 'True':
|
||||
raise RelatedObjectNotReadyTemporaryError(f'Box {s3_box_name} not ready', delay=30)
|
||||
|
||||
return s3_box_res
|
||||
|
||||
|
||||
def _get_s3_params_from_box(co_api: CustomObjectsApi, s3_box_name, namespace):
|
||||
data = {}
|
||||
|
||||
s3_box_res = _get_s3_box(co_api, s3_box_name, namespace, {'s3Storage', 's3DefaultStorage'})
|
||||
|
||||
s3_storage_status = s3_box_res['status']['s3Storage']
|
||||
if ('csiS3PersistentVolumeClaimName' not in s3_storage_status) \
|
||||
or (not s3_storage_status['csiS3PersistentVolumeClaimName']):
|
||||
raise InputValidationPermanentError(f's3Storage {s3_box_name} csiS3PersistentVolumeClaimName not set')
|
||||
|
||||
data['pvc_name'] = s3_storage_status['csiS3PersistentVolumeClaimName']
|
||||
return data
|
||||
|
||||
|
||||
def _process_connected_boxes_section(co_api: CustomObjectsApi, namespace, connected_boxes_section):
|
||||
results = []
|
||||
for box in connected_boxes_section:
|
||||
data = {}
|
||||
box_name = box['name']
|
||||
data['connected_box_name'] = box_name
|
||||
if 'mountS3Box' in box:
|
||||
if 'path' not in box:
|
||||
raise InputValidationPermanentError(f'path is required for mountS3Box')
|
||||
data['mount_path'] = box['path']
|
||||
mount_s3_box = box['mountS3Box']
|
||||
s3_box_name = mount_s3_box['s3BoxName']
|
||||
data['sub_path'] = mount_s3_box.get('subPath', None)
|
||||
|
||||
s3_params = _get_s3_params_from_box(co_api, s3_box_name, namespace)
|
||||
data |= s3_params
|
||||
results.append(('s3Box.mount', data))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
get_functions_registry = {
|
||||
's3Box.mount': _get_csi_s3_pvcs
|
||||
}
|
||||
|
||||
|
||||
def get_volumes_specs(api_client: ApiClient, logger, namespace, connected_boxes_section):
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
volumes_specs_to_use = []
|
||||
connected_boxes = _process_connected_boxes_section(co_api, namespace, connected_boxes_section)
|
||||
for key, data in connected_boxes:
|
||||
if key not in get_functions_registry:
|
||||
logger.warning(f'get function for key {key} not registered')
|
||||
continue
|
||||
connected_box_name = data.pop('connected_box_name')
|
||||
get_func = get_functions_registry[key]
|
||||
volume_spec_to_use = get_func(core_v1_api=core_v1_api, logger=logger, namespace=namespace,
|
||||
connected_box_name=connected_box_name, **data)
|
||||
volumes_specs_to_use.append(volume_spec_to_use)
|
||||
return volumes_specs_to_use
|
||||
|
||||
|
||||
def _get_s3_aws_creds_from_secret(core_v1_api: CoreV1Api, co_api: CustomObjectsApi, s3_box_name, namespace):
|
||||
s3_box_res = _get_s3_box(co_api, s3_box_name, namespace, {'s3Storage'})
|
||||
s3_storage = s3_box_res['spec']['s3Storage']
|
||||
if 'awsVarsS3Credentials' not in s3_storage:
|
||||
raise InputValidationPermanentError(f'awsVarsS3Credentials is expected, but not presented '
|
||||
f'in box {s3_box_name}')
|
||||
s3_storage_secret_name = s3_storage['awsVarsS3Credentials']
|
||||
secret: V1Secret = core_v1_api.read_namespaced_secret(name=s3_storage_secret_name, namespace=namespace)
|
||||
credentials = secret.to_dict()['data']
|
||||
access_key_id = base64.b64decode(credentials['AWS_ACCESS_KEY_ID']).decode('utf-8')
|
||||
secret_access_key = base64.b64decode(credentials['AWS_SECRET_ACCESS_KEY']).decode('utf-8')
|
||||
return access_key_id, secret_access_key
|
||||
|
||||
|
||||
def get_file_box_env(file_box_name,
|
||||
connected_boxes_section) \
|
||||
-> list[MLComponentEnvVar]:
|
||||
# для поддержки режима монтирования (сейчас только копирование)
|
||||
# реализация может быть аналогична get_model_storage_env
|
||||
for box in connected_boxes_section:
|
||||
if box['name'] == file_box_name:
|
||||
if 'copyS3Box' not in box:
|
||||
raise InputValidationPermanentError('Only support of copyS3Box for fileExchange '
|
||||
'is implemented now')
|
||||
api_files_s3_box = box['copyS3Box']
|
||||
s3_box_name = api_files_s3_box['s3BoxName']
|
||||
|
||||
envs = [MLComponentEnvVar(name='FILES_S3_BOX_NAME',
|
||||
value=s3_box_name),
|
||||
MLComponentEnvVar(name='FILES_S3_MODE',
|
||||
value='COPY')]
|
||||
return envs
|
||||
|
||||
raise InputValidationPermanentError(f'Box with name {file_box_name} not found in connectedBoxes section')
|
||||
|
||||
|
||||
def get_file_exchange_copy_flag(file_box_name, connected_boxes_section) -> bool:
|
||||
for box in connected_boxes_section:
|
||||
if box['name'] == file_box_name:
|
||||
if 'copyS3Box' not in box:
|
||||
raise InputValidationPermanentError('Only support of copyS3Box for fileExchange '
|
||||
'is implemented now')
|
||||
# сейчас поддерживается только режим копирования
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _get_s3_aws_creds_envs(core_v1_api: CoreV1Api, co_api: CustomObjectsApi, s3_box_name,
|
||||
namespace, envs_prefix='MODEL_S3_'):
|
||||
envs = []
|
||||
access_key_id, secret_access_key = _get_s3_aws_creds_from_secret(core_v1_api, co_api, s3_box_name,
|
||||
namespace)
|
||||
envs.append(MLComponentEnvVar(name=envs_prefix + 'AWS_ACCESS_KEY_ID', value=access_key_id))
|
||||
envs.append(MLComponentEnvVar(name=envs_prefix + 'AWS_SECRET_ACCESS_KEY', value=secret_access_key))
|
||||
return envs
|
||||
|
||||
|
||||
def _get_s3_config_envs(co_api: CustomObjectsApi, s3_box_name, namespace,
|
||||
envs_prefix='MODEL_S3_'):
|
||||
s3_box_res = _get_s3_box(co_api, s3_box_name, namespace, {'s3Storage'})
|
||||
s3_storage = s3_box_res['spec']['s3Storage']
|
||||
config_data = {'host_port': s3_storage['host'],
|
||||
'bucket_name': s3_storage['bucket']['name']}
|
||||
if 'subPath' in s3_storage['bucket']:
|
||||
config_data['bucket_sub_path'] = s3_storage['bucket']['subPath']
|
||||
|
||||
envs = [MLComponentEnvVar(name=envs_prefix + 'HOST_PORT', value=s3_storage['host']),
|
||||
MLComponentEnvVar(name=envs_prefix + 'BUCKET_NAME', value=s3_storage['bucket']['name'])]
|
||||
return config_data, envs
|
||||
|
||||
|
||||
def get_model_box_env(api_client: ApiClient, namespace,
|
||||
model_box_name, connected_boxes_section) \
|
||||
-> list[MLComponentEnvVar]:
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
co_api = CustomObjectsApi(api_client)
|
||||
for box in connected_boxes_section:
|
||||
if box['name'] == model_box_name:
|
||||
if not any(supported_boxes := {'copyS3Box', 'mountS3Box'} & box.keys()):
|
||||
raise InputValidationPermanentError(f'No supported boxes {supported_boxes} found')
|
||||
if 'copyS3Box' in box:
|
||||
if 'path' not in box:
|
||||
raise InputValidationPermanentError(f'path is expected, but not presented '
|
||||
f'in connected box {model_box_name}')
|
||||
envs = [MLComponentEnvVar(name='MODEL_S3_COPY_LOCAL_PATH', value=box['path']),
|
||||
MLComponentEnvVar(name='MODEL_S3_MODE', value='COPY')]
|
||||
copy_s3_box = box['copyS3Box']
|
||||
s3_box_name = copy_s3_box['s3BoxName']
|
||||
envs += _get_s3_aws_creds_envs(core_v1_api, co_api, s3_box_name, namespace)
|
||||
|
||||
config_data, config_envs = _get_s3_config_envs(co_api, s3_box_name,
|
||||
namespace)
|
||||
envs += config_envs
|
||||
model_s3_copy_bucket_path = ''
|
||||
if 'subPath' in copy_s3_box:
|
||||
model_s3_copy_bucket_path = copy_s3_box['subPath']
|
||||
if 'bucket_sub_path' in config_data:
|
||||
model_s3_copy_bucket_path = os.path.join(config_data['bucket_sub_path'],
|
||||
model_s3_copy_bucket_path)
|
||||
|
||||
envs.append(MLComponentEnvVar(name='MODEL_S3_COPY_BUCKET_PATH', value=model_s3_copy_bucket_path))
|
||||
return envs
|
||||
if 'mountS3Box' in box:
|
||||
envs = [MLComponentEnvVar(name='MODEL_S3_MODE', value='MOUNT')]
|
||||
return envs
|
||||
|
||||
raise InputValidationPermanentError(f'Box with name {model_box_name} not found in connectedBoxes section')
|
||||
|
||||
|
||||
def get_download_model_flag(model_box_name, connected_boxes_section):
|
||||
for box in connected_boxes_section:
|
||||
if box['name'] == model_box_name:
|
||||
if 'copyS3Box' in box:
|
||||
return True
|
||||
return False
|
||||
return False
|
||||
617
controller/src/mlcmp/handlers.py
Normal file
617
controller/src/mlcmp/handlers.py
Normal file
|
|
@ -0,0 +1,617 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: MLComponent
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
"""
|
||||
MLComponent object module. Handles all actions with MLComponent Kubernetes objects.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import dataclasses
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
import kopf
|
||||
import yaml
|
||||
from kubernetes.client import AppsV1Api, CoreV1Api, V1Secret, ApiException, V1ConfigMap
|
||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||
|
||||
from basic_resources.network_policies import create_network_policy, delete_network_policy_if_exists
|
||||
from bound import not_dev_namespace
|
||||
from exceptions import InputValidationPermanentError
|
||||
from kopf_k8s_client import api_client
|
||||
from mlcmp.connected_boxes import get_volumes_specs, get_file_box_env, \
|
||||
get_model_box_env, get_download_model_flag, get_file_exchange_copy_flag, \
|
||||
MLComponentEnvVar
|
||||
from mlcmp.jinja import jinja_env
|
||||
from parse import extract_domain
|
||||
|
||||
ALLOW_ML_CMP_INGRESS_TRAFFIC_NP_TEMPLATE = 'allow-ml-cmp-ingress-traffic-np.yaml'
|
||||
SERVICE_PORT = 80
|
||||
FILES_CONFIG_MAP_NAME = 'files-cfg'
|
||||
|
||||
CONFIG_MAP_SUFFIX = '-cfg'
|
||||
SECRET_SUFFIX = '-cred'
|
||||
|
||||
IS_GPU_AVAILABLE_TAINT_NAME = 'node.unified-platform.cs.hse.ru/is-gpu-available'
|
||||
IS_INTERRUPTIBLE_TAINT_NAME = 'node.unified-platform.cs.hse.ru/is-interruptible'
|
||||
GPU_LABEL_NAME = 'node.unified-platform.cs.hse.ru/resources.gpu'
|
||||
MLCMP_NODE_LABEL_NAME = 'node.unified-platform.cs.hse.ru/components.mlcomponent'
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentEnvFromItem:
|
||||
key: str
|
||||
body: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentVolume:
|
||||
volume_name: str
|
||||
pvc_name: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentVolumeMount:
|
||||
volume_name: str
|
||||
mount_path: str
|
||||
sub_path: Optional[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentResourceLimits:
|
||||
cpu: Optional[str]
|
||||
memory: Optional[str]
|
||||
gpu: Optional[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentResourceRequests:
|
||||
cpu: Optional[str]
|
||||
memory: Optional[str]
|
||||
gpu: Optional[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentToleration:
|
||||
key: str
|
||||
operator: str
|
||||
value: str
|
||||
effect: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentNodeAffinityExpression:
|
||||
key: str
|
||||
operator: str
|
||||
vals: list[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponentNodeAffinity:
|
||||
expressions: list[MLComponentNodeAffinityExpression]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MLComponent:
|
||||
name: str
|
||||
namespace: str
|
||||
deployment_name: str
|
||||
service_name: str
|
||||
image_name: str
|
||||
python_package_registry_credentials: str
|
||||
python_package_registry_host: str
|
||||
python_package_version: Optional[str]
|
||||
container_port: int
|
||||
service_port: int
|
||||
env: list[MLComponentEnvVar]
|
||||
env_from: list[MLComponentEnvFromItem]
|
||||
image_registry_credentials_secret_name: str
|
||||
volumes: list[MLComponentVolume]
|
||||
volume_mounts: list[MLComponentVolumeMount]
|
||||
download_model: bool
|
||||
file_exchange_copy: bool
|
||||
resource_requests: Optional[MLComponentResourceRequests]
|
||||
resource_limits: Optional[MLComponentResourceLimits]
|
||||
tolerations: list[MLComponentToleration]
|
||||
node_affinity: MLComponentNodeAffinity
|
||||
|
||||
|
||||
@dataclass
|
||||
class AllowIngressTrafficNetworkPolicy:
|
||||
name: str
|
||||
namespace: str
|
||||
ml_component_name: str
|
||||
|
||||
|
||||
def _get_deployment_name(name):
|
||||
return f'{name}-deployment'
|
||||
|
||||
|
||||
def _get_service_name(name):
|
||||
return f'{name}-svc'
|
||||
|
||||
|
||||
def _check_ml_component_is_ml_service(spec):
|
||||
if 'mlService' not in spec:
|
||||
raise InputValidationPermanentError('Only mlService is implemented now')
|
||||
|
||||
|
||||
def _get_image_reg_and_image_name_frm_image_section(spec):
|
||||
image = spec['image']
|
||||
if 'existingImageName' not in image:
|
||||
raise InputValidationPermanentError('Only support of existing images is implemented now')
|
||||
|
||||
image_name = image['existingImageName']
|
||||
image_registry_name = extract_domain(image_name)
|
||||
image_registry_cred_name = image_registry_name + SECRET_SUFFIX
|
||||
|
||||
return image_registry_cred_name, image_name
|
||||
|
||||
|
||||
def _get_env_from_model_section(namespace, model_section,
|
||||
connected_boxes_section) \
|
||||
-> list[MLComponentEnvVar]:
|
||||
model_path = model_section['modelPath']
|
||||
model_path_env_var = MLComponentEnvVar(name='INFERENCE_MODEL_PATH', value=model_path)
|
||||
envs = [model_path_env_var]
|
||||
if 'modelKind' in model_section:
|
||||
model_kind = model_section['modelKind']
|
||||
model_kind_env_var = MLComponentEnvVar(name='INFERENCE_MODEL_KIND', value=model_kind)
|
||||
envs.append(model_kind_env_var)
|
||||
model_box_name = model_section['modelBox']
|
||||
if connected_boxes_section:
|
||||
envs += get_model_box_env(api_client, namespace, model_box_name, connected_boxes_section)
|
||||
return envs
|
||||
|
||||
|
||||
def _get_env_from_file_exchange_section(file_exchange_section,
|
||||
connected_boxes_section) -> list[MLComponentEnvVar]:
|
||||
file_box_name = file_exchange_section['fileBox']
|
||||
inference_files_path = file_exchange_section['inferenceFilesPath']
|
||||
envs = [MLComponentEnvVar(name='FILES_S3_COPY_INFERENCE_FILES_DIR', value=inference_files_path)]
|
||||
if not connected_boxes_section:
|
||||
return envs
|
||||
envs += get_file_box_env(file_box_name=file_box_name,
|
||||
connected_boxes_section=connected_boxes_section)
|
||||
return envs
|
||||
|
||||
|
||||
def _get_file_exchange_copy_flag(file_exchange_section,
|
||||
connected_boxes_section) -> bool:
|
||||
file_box_name = file_exchange_section['fileBox']
|
||||
if connected_boxes_section:
|
||||
return get_file_exchange_copy_flag(file_box_name, connected_boxes_section)
|
||||
return False
|
||||
|
||||
|
||||
def _get_env_from_entry_point_section(entry_point_section) -> list[MLComponentEnvVar]:
|
||||
envs = []
|
||||
if 'pythonPath' in entry_point_section:
|
||||
envs.append(MLComponentEnvVar(name='PYTHONPATH_EXTENSION', value=entry_point_section['pythonPath']))
|
||||
envs.append(MLComponentEnvVar(name='INFERENCE_FUNCTION', value=entry_point_section['pythonFunction']))
|
||||
return envs
|
||||
|
||||
|
||||
def _get_env_from_env_section(spec) -> list[MLComponentEnvVar]:
|
||||
env_section = spec.get('env')
|
||||
envs = []
|
||||
if env_section:
|
||||
envs = [MLComponentEnvVar(name=env_var['name'], value=env_var['value']) for env_var in env_section]
|
||||
return envs
|
||||
|
||||
|
||||
def _get_env_from_license_section(ml_service_section) -> list[MLComponentEnvVar]:
|
||||
license_section = ml_service_section.get('license')
|
||||
envs = []
|
||||
if license_section:
|
||||
if 'licenseLocalPath' in license_section:
|
||||
return [MLComponentEnvVar(name='LICENSE_LOCAL_PATH', value=license_section['licenseLocalPath'])]
|
||||
return envs
|
||||
|
||||
|
||||
def _get_env_from_api_section(ml_service_section) -> list[MLComponentEnvVar]:
|
||||
api_section = ml_service_section.get('api')
|
||||
envs = []
|
||||
if api_section:
|
||||
if 'prefix' in api_section:
|
||||
return [MLComponentEnvVar(name='UNIP_MLCMP_PATH_PREFIX', value=api_section['prefix'])]
|
||||
return envs
|
||||
|
||||
|
||||
def _get_spec_resource_limits(spec) -> dict:
|
||||
if 'resourceLimits' not in spec:
|
||||
raise InputValidationPermanentError(f"resourceLimits must be set")
|
||||
resource_limits: dict = spec['resourceLimits']
|
||||
return resource_limits
|
||||
|
||||
|
||||
def _get_resource_limits(spec) -> Optional[MLComponentResourceLimits]:
|
||||
resource_limits = _get_spec_resource_limits(spec)
|
||||
if 'cpu' not in resource_limits and 'memory' not in resource_limits:
|
||||
raise InputValidationPermanentError(f"cpu and memory must be set")
|
||||
cpu = resource_limits.get('cpu', None)
|
||||
memory = resource_limits.get('memory', None)
|
||||
gpu = resource_limits.get('gpu', None)
|
||||
return MLComponentResourceLimits(cpu=cpu, memory=memory, gpu=gpu)
|
||||
|
||||
|
||||
def _get_resource_requests(spec) -> Optional[MLComponentResourceRequests]:
|
||||
resource_limits = _get_spec_resource_limits(spec)
|
||||
gpu = resource_limits.get('gpu', None)
|
||||
return MLComponentResourceRequests(cpu="0m", memory=None, gpu=gpu)
|
||||
|
||||
|
||||
def _get_node_affinity(spec) -> MLComponentNodeAffinity:
|
||||
resource_limits = _get_spec_resource_limits(spec)
|
||||
gpu = resource_limits.get('gpu', None)
|
||||
mlcmp_expr = MLComponentNodeAffinityExpression(key=MLCMP_NODE_LABEL_NAME,
|
||||
operator='In',
|
||||
vals=['true'])
|
||||
node_affinity = MLComponentNodeAffinity(expressions=[mlcmp_expr])
|
||||
|
||||
if gpu:
|
||||
gpu_expr = MLComponentNodeAffinityExpression(key=GPU_LABEL_NAME, operator='In', vals=['true'])
|
||||
node_affinity.expressions.append(gpu_expr)
|
||||
|
||||
return node_affinity
|
||||
|
||||
|
||||
def _get_tolerations(spec) -> list[MLComponentToleration]:
|
||||
resource_limits = _get_spec_resource_limits(spec)
|
||||
gpu = resource_limits.get('gpu', None)
|
||||
|
||||
tolerations = [MLComponentToleration(key=IS_INTERRUPTIBLE_TAINT_NAME,
|
||||
operator='Equal',
|
||||
value='true',
|
||||
effect='NoSchedule')]
|
||||
|
||||
if gpu:
|
||||
tolerations.append(MLComponentToleration(
|
||||
key=IS_GPU_AVAILABLE_TAINT_NAME,
|
||||
operator='Equal',
|
||||
value='true',
|
||||
effect='NoSchedule'))
|
||||
|
||||
return tolerations
|
||||
|
||||
|
||||
def _get_env_from_from_env_from_section(spec) -> list[MLComponentEnvFromItem]:
|
||||
res = []
|
||||
env_from_section = spec.get('envFrom')
|
||||
if env_from_section is None:
|
||||
return res
|
||||
|
||||
for elem in env_from_section:
|
||||
# elem: {'configMapRef': {'name': 'env-configmap'}}
|
||||
key_and_body = list(elem.items())
|
||||
# key_and_body: [('configMapRef', {'name': 'env-configmap'})]
|
||||
key = key_and_body[0][0]
|
||||
# key: "configMapRef"
|
||||
body = key_and_body[0][1]
|
||||
# body: {'name': 'env-configmap'}
|
||||
body_str = str(body).replace('{', '').replace('}', '').replace('\'', '')
|
||||
# body_str: "name: env-configmap"
|
||||
item = MLComponentEnvFromItem(key=key, body=body_str)
|
||||
res.append(item)
|
||||
return res
|
||||
|
||||
|
||||
def _get_env_from_for_files_cm() -> list[MLComponentEnvFromItem]:
|
||||
# key_and_body: [('configMapRef', {'name': 'env-configmap'})]
|
||||
key = "configMapRef"
|
||||
# key: "configMapRef"
|
||||
body = {'name': FILES_CONFIG_MAP_NAME}
|
||||
# body: {'name': 'env-configmap'}
|
||||
body_str = str(body).replace('{', '').replace('}', '').replace('\'', '')
|
||||
# body_str: "name: env-configmap"
|
||||
env_from = MLComponentEnvFromItem(key=key, body=body_str)
|
||||
return [env_from]
|
||||
|
||||
|
||||
def _get_py_package_reg_cred_and_host(core_v1_api: CoreV1Api, spec, namespace):
|
||||
package_registry_name = spec['mlService']['packageRegistryName']
|
||||
package_registry_cred_name = package_registry_name + SECRET_SUFFIX
|
||||
package_registry_cm_name = package_registry_name + CONFIG_MAP_SUFFIX
|
||||
|
||||
secret: V1Secret = core_v1_api.read_namespaced_secret(name=package_registry_cred_name, namespace=namespace)
|
||||
secret_body = secret.to_dict()
|
||||
credentials = base64.b64decode(secret_body['data']['credentials']).decode('utf-8')
|
||||
|
||||
config_map: V1ConfigMap = core_v1_api.read_namespaced_config_map(name=package_registry_cm_name, namespace=namespace)
|
||||
config_map_body = config_map.to_dict()
|
||||
host = config_map_body['data']['host']
|
||||
|
||||
return credentials, host
|
||||
|
||||
|
||||
def _get_py_package_version_spec(spec, logger):
|
||||
ml_service = spec['mlService']
|
||||
if 'packageVersionSpec' not in ml_service:
|
||||
return None
|
||||
version_str = ml_service['packageVersionSpec']
|
||||
try:
|
||||
SpecifierSet(version_str)
|
||||
except InvalidSpecifier as exc:
|
||||
logger.error(exc)
|
||||
raise InputValidationPermanentError from exc
|
||||
return version_str
|
||||
|
||||
|
||||
def _get_mlcmp_package_app_name_env() -> list[MLComponentEnvVar]:
|
||||
return [MLComponentEnvVar(name='MLCMP_PACKAGE_APP_NAME', value='unip.mlcmp.api:app')]
|
||||
|
||||
|
||||
def _get_app_name_env(app_name) -> list[MLComponentEnvVar]:
|
||||
return [MLComponentEnvVar(name='UNIP_MLCMP_APP_NAME', value=app_name)]
|
||||
|
||||
|
||||
def _get_ml_component_service_port(spec):
|
||||
ml_component_service_port = spec.get('servicePort')
|
||||
if not ml_component_service_port:
|
||||
raise InputValidationPermanentError(f"Service port must be set. Got {ml_component_service_port!r}.")
|
||||
return ml_component_service_port
|
||||
|
||||
|
||||
def _get_volumes_and_volume_mounts(spec, namespace, logger) \
|
||||
-> tuple[list[MLComponentVolume], list[MLComponentVolumeMount]]:
|
||||
if 'connectedBoxes' not in spec:
|
||||
return [], []
|
||||
|
||||
volumes = []
|
||||
volume_mounts = []
|
||||
volumes_specs_to_use = get_volumes_specs(api_client, logger, namespace, spec['connectedBoxes'])
|
||||
for volume_spec in volumes_specs_to_use:
|
||||
volumes.append(MLComponentVolume(volume_name=volume_spec['volume_in_spec_name'],
|
||||
pvc_name=volume_spec['pvc_name']))
|
||||
volume_mounts.append(MLComponentVolumeMount(volume_name=volume_spec['volume_in_spec_name'],
|
||||
mount_path=volume_spec['mount_path'],
|
||||
sub_path=volume_spec['sub_path']))
|
||||
return volumes, volume_mounts
|
||||
|
||||
|
||||
def _create_deployment(apps_v1_api: AppsV1Api, namespace, manifest):
|
||||
apps_v1_api.create_namespaced_deployment(namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def _create_service(core_v1_api: CoreV1Api, namespace, manifest):
|
||||
core_v1_api.create_namespaced_service(namespace=namespace, body=manifest)
|
||||
|
||||
|
||||
def _delete_deployment_if_exists(apps_v1_api: AppsV1Api, name, namespace, logger):
|
||||
deployment_name = _get_deployment_name(name)
|
||||
try:
|
||||
apps_v1_api.delete_namespaced_deployment(name=deployment_name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f"Deployment {deployment_name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Deployment {deployment_name} is deleted")
|
||||
|
||||
|
||||
def _delete_service_if_exists(core_v1_api: CoreV1Api, name, namespace, logger):
|
||||
service_name = _get_service_name(name)
|
||||
try:
|
||||
core_v1_api.delete_namespaced_service(name=service_name, namespace=namespace)
|
||||
except ApiException as exc:
|
||||
if exc.status == 404:
|
||||
logger.warn(f"Service {service_name} doesnt exist, do nothing")
|
||||
return
|
||||
raise exc
|
||||
logger.info(f"Service {service_name} is deleted")
|
||||
|
||||
|
||||
def _get_download_model_flag(model_section, connected_boxes_section):
|
||||
model_box_name = model_section['modelBox']
|
||||
if connected_boxes_section:
|
||||
return get_download_model_flag(model_box_name, connected_boxes_section)
|
||||
return False
|
||||
|
||||
|
||||
def _construct_ml_component(core_v1_api: CoreV1Api,
|
||||
spec, name, namespace, logger) -> MLComponent:
|
||||
_check_ml_component_is_ml_service(spec)
|
||||
|
||||
image_registry_cred_name, image_name = _get_image_reg_and_image_name_frm_image_section(spec)
|
||||
py_package_reg_cred, py_package_reg_host = _get_py_package_reg_cred_and_host(core_v1_api, spec, namespace)
|
||||
|
||||
py_package_version = _get_py_package_version_spec(spec, logger)
|
||||
|
||||
volumes, volume_mounts = _get_volumes_and_volume_mounts(spec, namespace, logger)
|
||||
|
||||
ml_component_service_port = _get_ml_component_service_port(spec)
|
||||
|
||||
env = _get_env_from_model_section(namespace, spec['mlService']['inference']['model'],
|
||||
spec.get('connectedBoxes'))
|
||||
|
||||
download_model = _get_download_model_flag(spec['mlService']['inference']['model'], spec.get('connectedBoxes'))
|
||||
|
||||
file_exchange_copy = _get_file_exchange_copy_flag(spec['mlService']['inference']['fileExchange'],
|
||||
spec.get('connectedBoxes'))
|
||||
|
||||
env += _get_env_from_file_exchange_section(spec['mlService']['inference']['fileExchange'],
|
||||
spec.get('connectedBoxes'))
|
||||
env += _get_env_from_entry_point_section(spec['mlService']['inference']['entryPoint'])
|
||||
|
||||
env += _get_env_from_license_section(spec['mlService'])
|
||||
|
||||
env += _get_env_from_api_section(spec['mlService'])
|
||||
|
||||
env += _get_app_name_env(namespace)
|
||||
|
||||
env += _get_env_from_env_section(spec)
|
||||
|
||||
env += _get_mlcmp_package_app_name_env()
|
||||
|
||||
env_from = _get_env_from_for_files_cm()
|
||||
|
||||
env_from += _get_env_from_from_env_from_section(spec)
|
||||
|
||||
deployment_name = _get_deployment_name(name)
|
||||
|
||||
service_name = _get_service_name(name)
|
||||
|
||||
resource_limits = _get_resource_limits(spec)
|
||||
resource_requests = _get_resource_requests(spec)
|
||||
|
||||
node_affinity = _get_node_affinity(spec)
|
||||
tolerations = _get_tolerations(spec)
|
||||
|
||||
service_port = SERVICE_PORT
|
||||
|
||||
ml_component = MLComponent(name=name, namespace=namespace, deployment_name=deployment_name, image_name=image_name,
|
||||
python_package_registry_credentials=py_package_reg_cred,
|
||||
python_package_registry_host=py_package_reg_host,
|
||||
python_package_version=py_package_version,
|
||||
container_port=ml_component_service_port,
|
||||
env=env, env_from=env_from,
|
||||
image_registry_credentials_secret_name=image_registry_cred_name,
|
||||
volumes=volumes, volume_mounts=volume_mounts, service_name=service_name,
|
||||
service_port=service_port, download_model=download_model,
|
||||
file_exchange_copy=file_exchange_copy,
|
||||
resource_limits=resource_limits,
|
||||
resource_requests=resource_requests,
|
||||
tolerations=tolerations,
|
||||
node_affinity=node_affinity)
|
||||
|
||||
return ml_component
|
||||
|
||||
|
||||
def _construct_deployment_manifest(ml_component: MLComponent):
|
||||
template = jinja_env.get_template('ml-component-deployment.yaml')
|
||||
text = template.render(dataclasses.asdict(ml_component))
|
||||
data = yaml.safe_load(text)
|
||||
return data
|
||||
|
||||
|
||||
def _construct_service_manifest(ml_component: MLComponent):
|
||||
template = jinja_env.get_template('ml-component-svc.yaml')
|
||||
text = template.render(dataclasses.asdict(ml_component))
|
||||
data = yaml.safe_load(text)
|
||||
return data
|
||||
|
||||
|
||||
def _create_ml_component_deployment(core_v1_api, apps_v1_api, spec, name, namespace, logger):
|
||||
ml_component = _construct_ml_component(core_v1_api, spec, name, namespace, logger)
|
||||
deployment_manifest = _construct_deployment_manifest(ml_component)
|
||||
_create_deployment(apps_v1_api, namespace, deployment_manifest)
|
||||
|
||||
|
||||
def _create_ml_component_service(core_v1_api, spec, name, namespace, logger):
|
||||
ml_component = _construct_ml_component(core_v1_api, spec, name, namespace, logger)
|
||||
service_manifest = _construct_service_manifest(ml_component)
|
||||
_create_service(core_v1_api, namespace, service_manifest)
|
||||
|
||||
|
||||
def _get_allow_ingress_traffic_np_name(name):
|
||||
return f'ingress-{name}-traffic'
|
||||
|
||||
|
||||
def _construct_allow_ingress_traffic_np_manifest(np: AllowIngressTrafficNetworkPolicy):
|
||||
template = jinja_env.get_template(ALLOW_ML_CMP_INGRESS_TRAFFIC_NP_TEMPLATE)
|
||||
text = template.render(dataclasses.asdict(np))
|
||||
data = yaml.safe_load(text)
|
||||
return data
|
||||
|
||||
|
||||
def _create_allow_ingress_traffic_np(name, namespace, logger):
|
||||
np_name = _get_allow_ingress_traffic_np_name(name)
|
||||
np = AllowIngressTrafficNetworkPolicy(name=np_name, namespace=namespace, ml_component_name=name)
|
||||
manifest = _construct_allow_ingress_traffic_np_manifest(np)
|
||||
create_network_policy(api_client=api_client, manifest=manifest, namespace=namespace,
|
||||
logger=logger)
|
||||
|
||||
|
||||
def _delete_allow_ingress_traffic_np(name, namespace, logger):
|
||||
np_name = _get_allow_ingress_traffic_np_name(name)
|
||||
delete_network_policy_if_exists(api_client=api_client, np_name=np_name, namespace=namespace,
|
||||
logger=logger)
|
||||
|
||||
|
||||
@kopf.on.create('unified-platform.cs.hse.ru', 'mlcomponents',
|
||||
when=not_dev_namespace,
|
||||
retries=12)
|
||||
def create_ml_component(spec, name, namespace, body, logger, **_):
|
||||
"""
|
||||
Handler for MLComponent object creation event.
|
||||
|
||||
Creates service, deployment and service exposing network policy.
|
||||
|
||||
Implements create-subhandler pattern.
|
||||
Creates subobjects by subhandlers in sequential order. Retries failed subhandlers, but does not repeat succeeded.
|
||||
In case of kopf failure, inconsistent state can be fixed via MLComponent resource delete followed by repeated
|
||||
create.
|
||||
|
||||
:param spec: spec from MLComponent resource
|
||||
:param name: name from MLComponent resource
|
||||
:param namespace: namespace from MLComponent resource
|
||||
:param body: body from MLComponent resource
|
||||
:param logger: kopf logger
|
||||
:param _: other parameters passed by kopf which are of no interest
|
||||
:return: None
|
||||
"""
|
||||
logger.debug(f"A create_ml_component handler is called with body: {body}")
|
||||
|
||||
apps_v1_api = AppsV1Api(api_client)
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
|
||||
@kopf.subhandler(id=f'create-deployment-{name}', retries=3)
|
||||
def _create_deployment_handler(**_):
|
||||
_create_ml_component_deployment(core_v1_api, apps_v1_api, spec, name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'create-service-{name}', retries=3)
|
||||
def _create_service_handler(**_):
|
||||
_create_ml_component_service(core_v1_api, spec, name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id='create-allow-ingress-traffic-network-policy', retries=3)
|
||||
def _create_allow_ingress_traffic_np_handler(**__):
|
||||
_create_allow_ingress_traffic_np(name=name, namespace=namespace,
|
||||
logger=logger)
|
||||
|
||||
|
||||
@kopf.on.delete('unified-platform.cs.hse.ru', 'mlcomponents',
|
||||
retries=5,
|
||||
when=not_dev_namespace)
|
||||
def delete_ml_component(name, namespace, logger, **_):
|
||||
"""
|
||||
Handler for MLComponent object deletion event.
|
||||
|
||||
Deletes service, deployment and service exposing network policy.
|
||||
|
||||
Implements delete pattern.
|
||||
Deletes subobjects in sequential order (without subhandlers). Retries whole handler (all subobjects deletion)
|
||||
in case of error, but already deleted objects are skipped, hence no error is raised.
|
||||
|
||||
Implementation is idempotent. In case of kopf failure, inconsistent state can be fixed via repeated delete.
|
||||
|
||||
:param name: name of MLComponent object
|
||||
:param namespace: namespace of MLComponent object
|
||||
:param logger: kopf logger
|
||||
:param _: other parameters passed by kopf which are of no interest
|
||||
:return: None
|
||||
"""
|
||||
logger.debug(f"A delete_ml_component handler is called for namespace.name: {namespace}.{name}")
|
||||
|
||||
apps_v1_api = AppsV1Api(api_client)
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
|
||||
_delete_deployment_if_exists(apps_v1_api, name, namespace, logger)
|
||||
_delete_service_if_exists(core_v1_api, name, namespace, logger)
|
||||
_delete_allow_ingress_traffic_np(name, namespace, logger)
|
||||
|
||||
|
||||
@kopf.on.update('unified-platform.cs.hse.ru', 'mlcomponents',
|
||||
retries=3,
|
||||
when=not_dev_namespace)
|
||||
def update_ml_component(spec, name, namespace, body, logger, **_):
|
||||
apps_v1_api = AppsV1Api(api_client)
|
||||
core_v1_api = CoreV1Api(api_client)
|
||||
|
||||
_delete_deployment_if_exists(apps_v1_api, name, namespace, logger)
|
||||
|
||||
@kopf.subhandler(id=f'create-deployment-{name}', retries=3)
|
||||
def _create_deployment_handler(**_):
|
||||
_create_ml_component_deployment(core_v1_api, apps_v1_api, spec, name, namespace, logger)
|
||||
20
controller/src/mlcmp/jinja.py
Normal file
20
controller/src/mlcmp/jinja.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: MLComponent
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
"""
|
||||
MLComponent object jinja module.
|
||||
"""
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
jinja_env = Environment(
|
||||
loader=FileSystemLoader('templates/ml-component'),
|
||||
lstrip_blocks=True,
|
||||
trim_blocks=True
|
||||
)
|
||||
|
||||
|
||||
|
||||
20
controller/src/parse.py
Normal file
20
controller/src/parse.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: Утилиты
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
import posixpath
|
||||
|
||||
|
||||
def extract_domain(url: str) -> str:
|
||||
prev = rest = url
|
||||
while rest != '' and rest != '/':
|
||||
prev = rest
|
||||
rest, _ = posixpath.split(rest)
|
||||
return prev
|
||||
|
||||
|
||||
def get_user_namespace(app_namespace: str):
|
||||
user_namespace = app_namespace[:app_namespace.find('-pa-')]
|
||||
return user_namespace
|
||||
0
controller/src/platform_app/__init__.py
Normal file
0
controller/src/platform_app/__init__.py
Normal file
20
controller/src/platform_app/api.py
Normal file
20
controller/src/platform_app/api.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# ============================================================
|
||||
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
|
||||
# Модуль: PlatformApp
|
||||
# Авторы: Полежаев В.А., Хританков А.С.
|
||||
# Дата создания: 2024 г.
|
||||
# ============================================================
|
||||
from kubernetes.client import ApiClient
|
||||
|
||||
from platform_app.basic_resources import copy_secret, SECRET_SUFFIX
|
||||
|
||||
|
||||
def create_apis_secrets(api_client: ApiClient, spec, user_namespace, secret_namespace, logger):
|
||||
if 'apisCredentials' in spec:
|
||||
for api_cred in spec['apisCredentials']:
|
||||
api_cred_name = api_cred['name']
|
||||
new_secret_name = api_cred_name + SECRET_SUFFIX
|
||||
secret_ref = api_cred['secretRef']
|
||||
secret_name = secret_ref['name']
|
||||
copy_secret(api_client, secret_name, user_namespace,
|
||||
new_secret_name, secret_namespace, logger)
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue