unip-controller/controller/templates/experiment-pipeline/simple-pipeline-trial.yaml

223 lines
7.3 KiB
YAML
Raw Normal View History

2025-01-29 13:13:51 +00:00
# ============================================================
# Система: Единая библиотека, Центр ИИ НИУ ВШЭ
# Модуль: ExperimentPipeline
# Авторы: Полежаев В.А., Хританков А.С.
# Дата создания: 2024 г.
# ============================================================
piVersion: batch/v1
kind: Job
metadata:
name: {{ job_name }}
namespace: {{ namespace }}
{% if annotations %}
annotations:
{% for key, value in annotations.items() %}
{{ key }}: "{{ value }}"
{% endfor %}
{% endif %}
{% if labels %}
labels:
{% for key, value in labels.items() %}
{{ key }}: "{{ value }}"
{% endfor %}
{% endif %}
spec:
completions: 1
backoffLimit: 3
template:
{% if labels %}
metadata:
labels:
{% for key, value in labels.items() %}
{{ key }}: "{{ value }}"
{% endfor %}
{% endif %}
spec:
restartPolicy: Never
initContainers:
{% for stage in stages %}
- name: {{ stage.name }}
image: {{ stage.image }}
{% if stage.cmd %}
command:
{% for line in stage.cmd %}
- {{ line }}
{% endfor %}
{% endif %}
{% if stage.volume_mounts %}
volumeMounts:
{% for volume_mount in stage.volume_mounts %}
- name: {{ volume_mount['volume_name'] }}
mountPath: {{ volume_mount['mount_path'] }}
{% if volume_mount['read_only'] %}
readOnly: True
{% endif %}
{% if volume_mount['sub_path'] %}
subPath: {{ volume_mount['sub_path'] }}
{% endif %}
{% endfor %}
{% endif %}
{% if stage.resource_limits or stage.resource_requests %}
resources:
{% if stage.resource_limits %}
limits:
{% if stage.resource_limits.memory %}
memory: {{ stage.resource_limits.memory }}
{% endif %}
{% if stage.resource_limits.cpu %}
cpu: {{ stage.resource_limits.cpu }}
{% endif %}
{% if stage.resource_limits.gpu %}
nvidia.com/gpu: {{ stage.resource_limits.gpu }}
{% endif %}
{% endif %}
{% if stage.resource_requests %}
requests:
{% if stage.resource_requests.memory %}
memory: {{ stage.resource_requests.memory }}
{% endif %}
{% if stage.resource_requests.cpu %}
cpu: {{ stage.resource_requests.cpu }}
{% endif %}
{% if stage.resource_requests.gpu %}
nvidia.com/gpu: {{ stage.resource_requests.gpu }}
{% endif %}
{% endif %}
{% endif %}
{% if stage.env %}
env:
{% for env_var in stage.env %}
- name: {{ env_var['name'] }}
value: "{{ env_var['value'] }}"
{% endfor %}
{% endif %}
{% if stage.env_from %}
envFrom:
{% for env_from_elem in stage.env_from %}
- {{ env_from_elem['key'] }}:
{{ env_from_elem['body'] }}
{% endfor %}
{% endif %}
{% endfor %}
containers:
- name: validate-trial-results
image: {{ validation.image_name }}
{% if validation %}
command: ["python3", "-m"]
args:
- exp_pipeline.results.main
{% if validation.volume_mounts %}
volumeMounts:
{% for volume_mount in validation.volume_mounts %}
- name: {{ volume_mount['volume_name'] }}
mountPath: {{ volume_mount['mount_path'] }}
{% if volume_mount['read_only'] %}
readOnly: True
{% endif %}
{% if volume_mount['sub_path'] %}
subPath: {{ volume_mount['sub_path'] }}
{% endif %}
{% endfor %}
{% endif %}
{% if validation.resource_limits %}
resources:
limits:
{% if validation.resource_limits.memory %}
memory: {{ validation.resource_limits.memory }}
{% endif %}
{% if validation.resource_limits.cpu %}
cpu: {{ validation.resource_limits.cpu }}
{% endif %}
{% if validation.resource_limits.gpu %}
nvidia.com/gpu: {{ validation.resource_limits.gpu }}
{% endif %}
{% endif %}
{% if validation.env %}
env:
{% for env_var in validation.env %}
- name: {{ env_var[ 'name' ] }}
value: "{{ env_var['value'] }}"
{% endfor %}
{% endif %}
{% if validation.env_from %}
envFrom:
{% for env_from_elem in validation.env_from %}
- {{ env_from_elem['key'] }}:
{{ env_from_elem['body'] }}
{% endfor %}
{% endif %}
{% else %}
command: [ "python3", "-c" ]
args:
- print('Results validation disabled')
{% endif %}
{% if volumes %}
volumes:
{% for volume in volumes %}
- name: {{ volume['volume_name'] }}
{% if volume['pvc_name'] %}
persistentVolumeClaim:
claimName: {{ volume['pvc_name'] }}
{% endif %}
{% if volume['secret_name'] %}
secret:
secretName: {{ volume['secret_name'] }}
{% if volume['volume_items'] %}
items:
{% for vi in volume['volume_items'] %}
- key: "{{ vi.key }}"
path: "{{ vi.path }}"
{% endfor %}
{% endif %}
{% endif %}
{% if volume['configmap_name'] %}
configMap:
name: {{ volume['configmap_name'] }}
{% if volume['volume_items'] %}
items:
{% for vi in volume['volume_items'] %}
- key: "{{ vi.key }}"
path: "{{ vi.path }}"
{% endfor %}
{% endif %}
{% endif %}
{% endfor %}
{% endif %}
{% if image_reg_creds %}
imagePullPolicy: Always
imagePullSecrets:
{% for secret_name in image_reg_creds %}
- name: {{ secret_name }}
{% endfor %}
{% endif %}
{% if tolerations %}
tolerations:
{% for tol in tolerations %}
- key: "{{ tol.key }}"
operator: "{{ tol.operator }}"
value: "{{ tol.value }}"
effect: "{{ tol.effect }}"
{% endfor %}
{% endif %}
{% if node_selector %}
nodeSelector:
{% for key, value in node_selector.items() %}
{{ key }}: "{{ value }}"
{% endfor %}
{% endif %}
{% if node_affinity %}
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
{% for e in node_affinity.expressions %}
- key: {{ e.key }}
operator: {{ e.operator }}
values:
{% for v in e.vals %}
- "{{ v }}"
{% endfor %}
{% endfor %}
{% endif %}