พัฒนา Backend (เพิ่ม Health Check Endpoint)
This commit is contained in:
parent
16dd285bab
commit
a4b5dcf110
0
backend/api/services/__init__.py
Normal file
0
backend/api/services/__init__.py
Normal file
186
backend/api/services/health_service.py
Normal file
186
backend/api/services/health_service.py
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
from django.db import connection, Error as DjangoDBError
|
||||||
|
from django.core.cache import cache
|
||||||
|
from django.conf import settings
|
||||||
|
import boto3
|
||||||
|
from botocore.client import Config
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
# นำเข้า Repository และ Service ของ Model Registry
|
||||||
|
from model_registry.repositories.ai_model_repository import AiModelRepository
|
||||||
|
from model_registry.models import AiModel
|
||||||
|
|
||||||
|
# สร้าง Instance ของ Repository และ Service (ถ้ายังไม่มีในไฟล์นี้)
|
||||||
|
ai_model_repo = AiModelRepository()
|
||||||
|
|
||||||
|
# กำหนด URL ของ AI Service (ใช้ localhost สำหรับ Local Dev)
|
||||||
|
AI_SERVICE_URL = os.getenv("AI_SERVICE_INTERNAL_URL", "http://localhost:8001")
|
||||||
|
|
||||||
|
# ใช้ Exception เดิมจาก model_registry
|
||||||
|
class HealthCheckError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class HealthService:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _check_database(self):
|
||||||
|
# Logic ตรวจสอบ CockroachDB (เหมือนเดิม)
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute("SELECT 1")
|
||||||
|
latency = round((time.time() - start_time) * 1000, 2)
|
||||||
|
return "UP", f"Query executed successfully. Latency: {latency}ms"
|
||||||
|
except DjangoDBError as e:
|
||||||
|
return "DOWN", f"DB Connection Error: {e}"
|
||||||
|
except Exception as e:
|
||||||
|
return "DOWN", f"Unknown DB Error: {e}"
|
||||||
|
|
||||||
|
def _check_cache(self):
|
||||||
|
# Logic ตรวจสอบ Redis (เหมือนเดิม)
|
||||||
|
start_time = time.time()
|
||||||
|
test_key = 'health_test_key'
|
||||||
|
try:
|
||||||
|
cache.set(test_key, 'ok', timeout=1)
|
||||||
|
result = cache.get(test_key)
|
||||||
|
latency = round((time.time() - start_time) * 1000, 2)
|
||||||
|
if result == 'ok':
|
||||||
|
return "UP", f"Read/Write successful. Latency: {latency}ms"
|
||||||
|
return "DOWN", "Failed to retrieve test key."
|
||||||
|
except Exception as e:
|
||||||
|
return "DOWN", f"Redis Error: {e}"
|
||||||
|
|
||||||
|
def _check_minio(self):
|
||||||
|
"""Logic ตรวจสอบ Object Storage (MinIO) โดยใช้ boto3"""
|
||||||
|
try:
|
||||||
|
# 1. สร้าง S3 Client ด้วย boto3
|
||||||
|
s3_client = boto3.client(
|
||||||
|
"s3",
|
||||||
|
endpoint_url=os.getenv("MINIO_ENDPOINT", "http://localhost:9000"),
|
||||||
|
aws_access_key_id=os.getenv("MINIO_ACCESS_KEY", "minio_admin"),
|
||||||
|
aws_secret_access_key=os.getenv("MINIO_SECRET_KEY", "minio_p@ssw0rd!"),
|
||||||
|
# ใช้ Config เพื่อจัดการ timeout/signature version
|
||||||
|
config=Config(signature_version="s3v4", connect_timeout=5, read_timeout=10)
|
||||||
|
)
|
||||||
|
|
||||||
|
bucket_name = os.getenv("MODEL_BUCKET", "models")
|
||||||
|
|
||||||
|
# 2. ทดสอบการเข้าถึง Bucket โดยใช้ head_bucket (มีประสิทธิภาพกว่า list_buckets)
|
||||||
|
# ถ้า Bucket มีอยู่ จะไม่เกิด Error
|
||||||
|
s3_client.head_bucket(Bucket=bucket_name)
|
||||||
|
|
||||||
|
return "UP", f"Bucket '{bucket_name}' accessible via boto3."
|
||||||
|
|
||||||
|
except ClientError as e:
|
||||||
|
error_code = e.response['Error']['Code']
|
||||||
|
if error_code == '404':
|
||||||
|
return "DOWN", f"Bucket '{bucket_name}' not found."
|
||||||
|
elif error_code == '403':
|
||||||
|
return "DOWN", f"MinIO S3 Access Denied. Check Key/Secret."
|
||||||
|
return "DOWN", f"MinIO S3 Error (Code {error_code}): {e}"
|
||||||
|
except Exception as e:
|
||||||
|
return "DOWN", f"MinIO Connection Error: {e}"
|
||||||
|
|
||||||
|
def _check_ai_service(self):
|
||||||
|
# Logic ตรวจสอบ FastAPI/MONAI (เหมือนเดิม)
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
response = requests.get(AI_SERVICE_URL, timeout=5)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
model_loaded = data.get("model_loaded", False)
|
||||||
|
|
||||||
|
latency = round((time.time() - start_time) * 1000, 2)
|
||||||
|
|
||||||
|
if model_loaded:
|
||||||
|
return "UP", f"Service running & Model loaded. Latency: {latency}ms"
|
||||||
|
else:
|
||||||
|
return "DOWN", f"Service running, but Model load status: {model_loaded}. Latency: {latency}ms"
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
return "DOWN", f"FastAPI/MONAI Service Unreachable: {e}"
|
||||||
|
except Exception as e:
|
||||||
|
return "DOWN", f"AI Service Check Failed: {e}"
|
||||||
|
|
||||||
|
# -----------------------------------------------
|
||||||
|
# ตรวจสอบ Health Check ของ AI Model แต่ละตัว
|
||||||
|
# -----------------------------------------------
|
||||||
|
def _check_single_ai_model(self, model: AiModel):
|
||||||
|
"""เรียก Health Check ของโมเดลตัวเดียวโดยใช้ Full URL"""
|
||||||
|
# ใช้ endpoint เฉพาะสำหรับ Health Check (Path มี '/health' ต่อท้าย)
|
||||||
|
health_url = model.full_inference_url().rstrip('/') + '/health'
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# ใช้ requests.get ไปยัง Health Check Endpoint
|
||||||
|
response = requests.get(health_url, timeout=5)
|
||||||
|
response.raise_for_status() # จะผ่านถ้าได้ 200 OK
|
||||||
|
# ... (ส่วนการคำนวณ Latency และ return "UP" เหมือนเดิม)
|
||||||
|
latency = round((time.time() - start_time) * 1000, 2)
|
||||||
|
return "UP", f"Model Health Check successful. Latency: {latency}ms"
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
return "DOWN", f"Health Check Failed at {health_url}: {e}"
|
||||||
|
except Exception as e:
|
||||||
|
return "DOWN", f"Check Failed: {e}"
|
||||||
|
|
||||||
|
|
||||||
|
def _check_all_ai_models(self):
|
||||||
|
"""วนลูปตรวจสอบโมเดลทั้งหมดที่สถานะ ACTIVE"""
|
||||||
|
active_models = ai_model_repo.get_all().filter(status='ACTIVE')
|
||||||
|
model_statuses = []
|
||||||
|
|
||||||
|
for model in active_models:
|
||||||
|
status, detail = self._check_single_ai_model(model)
|
||||||
|
model_statuses.append({
|
||||||
|
"name": f"{model.name} (v{model.model_version})",
|
||||||
|
"id": str(model.id), # ส่งเป็น String เพื่อป้องกัน JS Truncation
|
||||||
|
"status": status,
|
||||||
|
"endpoint": model.full_inference_url(),
|
||||||
|
"details": detail
|
||||||
|
})
|
||||||
|
|
||||||
|
overall_model_health = "UP" if all(m['status'] == "UP" for m in model_statuses) else "DOWN"
|
||||||
|
|
||||||
|
return overall_model_health, model_statuses
|
||||||
|
|
||||||
|
def get_system_health(self):
|
||||||
|
"""เมธอดหลักที่รวมผลลัพธ์ทั้งหมด"""
|
||||||
|
results = {}
|
||||||
|
overall_status = "Healthy"
|
||||||
|
|
||||||
|
# ตรวจสอบ Health ของ AI Model แต่ละตัว
|
||||||
|
ai_overall_status, model_details = self._check_all_ai_models()
|
||||||
|
|
||||||
|
# รวมผลลัพธ์ของ Model Health เข้าไปใน results
|
||||||
|
results['model_endpoints'] = {"name": "AI Model Endpoints", "status": ai_overall_status, "models": model_details}
|
||||||
|
if ai_overall_status != 'UP': overall_status = "Degraded"
|
||||||
|
|
||||||
|
# รัน Check ทั้งหมด
|
||||||
|
db_status, db_details = self._check_database()
|
||||||
|
results['database'] = {"name": "CockroachDB", "status": db_status, "details": db_details}
|
||||||
|
if db_status != 'UP': overall_status = "Degraded"
|
||||||
|
|
||||||
|
cache_status, cache_details = self._check_cache()
|
||||||
|
results['cache'] = {"name": "Redis Cache", "status": cache_status, "details": cache_details}
|
||||||
|
if cache_status != 'UP': overall_status = "Degraded"
|
||||||
|
|
||||||
|
minio_status, minio_details = self._check_minio()
|
||||||
|
results['storage'] = {"name": "MinIO S3", "status": minio_status, "details": minio_details}
|
||||||
|
if minio_status != 'UP': overall_status = "Degraded"
|
||||||
|
|
||||||
|
ai_status, ai_details = self._check_ai_service()
|
||||||
|
results['ai_service'] = {"name": "MONAI FastAPI", "status": ai_status, "details": ai_details}
|
||||||
|
if ai_status != 'UP': overall_status = "Degraded"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": overall_status,
|
||||||
|
"services": results,
|
||||||
|
"last_checked": datetime.now(timezone.utc).isoformat()
|
||||||
|
}
|
||||||
0
backend/api/views/__init__.py
Normal file
0
backend/api/views/__init__.py
Normal file
36
backend/api/views/health_check_view.py
Normal file
36
backend/api/views/health_check_view.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from rest_framework.views import APIView
|
||||||
|
from rest_framework.response import Response
|
||||||
|
from rest_framework import status, permissions
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
# Import Service Layer
|
||||||
|
from api.services.health_service import HealthService
|
||||||
|
|
||||||
|
# Dependency Injection: สร้าง Instance ของ Service
|
||||||
|
health_service = HealthService()
|
||||||
|
|
||||||
|
class SystemHealthCheck(APIView):
|
||||||
|
"""
|
||||||
|
GET /api/v1/health/
|
||||||
|
Controller สำหรับดึงสถานะ Health Check ของระบบ
|
||||||
|
"""
|
||||||
|
permission_classes = [permissions.AllowAny]
|
||||||
|
|
||||||
|
def get(self, request):
|
||||||
|
try:
|
||||||
|
# เรียกใช้ Service Layer
|
||||||
|
response_data = health_service.get_system_health()
|
||||||
|
|
||||||
|
# กำหนด HTTP Status ตามสถานะรวม
|
||||||
|
http_status = status.HTTP_200_OK
|
||||||
|
if response_data['status'] != "Healthy":
|
||||||
|
http_status = status.HTTP_503_SERVICE_UNAVAILABLE
|
||||||
|
|
||||||
|
return Response(response_data, status=http_status)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# จัดการข้อผิดพลาดที่ไม่คาดคิดในระดับ View
|
||||||
|
return Response(
|
||||||
|
{"status": "Error", "detail": f"Internal Server Error during health check: {e}"},
|
||||||
|
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||||
|
)
|
||||||
@ -21,6 +21,9 @@ from rest_framework.routers import DefaultRouter
|
|||||||
from model_registry.views.ai_model_viewset import AiModelRegistryViewSet
|
from model_registry.views.ai_model_viewset import AiModelRegistryViewSet
|
||||||
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView, SpectacularRedocView
|
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView, SpectacularRedocView
|
||||||
|
|
||||||
|
# Import Health Check View ในแอพ /api
|
||||||
|
from api.views.health_check_view import SystemHealthCheck
|
||||||
|
|
||||||
# 1. กำหนดตัวแปร router ก่อนใช้งาน
|
# 1. กำหนดตัวแปร router ก่อนใช้งาน
|
||||||
router = DefaultRouter()
|
router = DefaultRouter()
|
||||||
|
|
||||||
@ -49,6 +52,9 @@ urlpatterns = [
|
|||||||
path('api/v1/auth/', include('djoser.urls')), # /users/ (Register/Update/Me), /users/set_password
|
path('api/v1/auth/', include('djoser.urls')), # /users/ (Register/Update/Me), /users/set_password
|
||||||
path('api/v1/auth/', include('djoser.urls.jwt')), # /jwt/create (Login), /jwt/refresh (Refresh Token)
|
path('api/v1/auth/', include('djoser.urls.jwt')), # /jwt/create (Login), /jwt/refresh (Refresh Token)
|
||||||
|
|
||||||
|
# Health Check Endpoint URL: /api/v1/health/
|
||||||
|
path('api/v1/health/', SystemHealthCheck.as_view(), name='system-health'),
|
||||||
|
|
||||||
# 3. รวม Router API
|
# 3. รวม Router API
|
||||||
path('api/v1/', include(router.urls)),
|
path('api/v1/', include(router.urls)),
|
||||||
]
|
]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user