พัฒนา Backend (เพิ่ม Health Check Endpoint)

This commit is contained in:
Flook 2025-11-14 05:27:10 +07:00
parent 16dd285bab
commit a4b5dcf110
5 changed files with 228 additions and 0 deletions

View File

View File

@ -0,0 +1,186 @@
import os
import requests
import time
from django.db import connection, Error as DjangoDBError
from django.core.cache import cache
from django.conf import settings
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from datetime import datetime, timezone
# นำเข้า Repository และ Service ของ Model Registry
from model_registry.repositories.ai_model_repository import AiModelRepository
from model_registry.models import AiModel
# สร้าง Instance ของ Repository และ Service (ถ้ายังไม่มีในไฟล์นี้)
ai_model_repo = AiModelRepository()
# กำหนด URL ของ AI Service (ใช้ localhost สำหรับ Local Dev)
AI_SERVICE_URL = os.getenv("AI_SERVICE_INTERNAL_URL", "http://localhost:8001")
# ใช้ Exception เดิมจาก model_registry
class HealthCheckError(Exception):
pass
class HealthService:
def __init__(self):
pass
def _check_database(self):
# Logic ตรวจสอบ CockroachDB (เหมือนเดิม)
start_time = time.time()
try:
with connection.cursor() as cursor:
cursor.execute("SELECT 1")
latency = round((time.time() - start_time) * 1000, 2)
return "UP", f"Query executed successfully. Latency: {latency}ms"
except DjangoDBError as e:
return "DOWN", f"DB Connection Error: {e}"
except Exception as e:
return "DOWN", f"Unknown DB Error: {e}"
def _check_cache(self):
# Logic ตรวจสอบ Redis (เหมือนเดิม)
start_time = time.time()
test_key = 'health_test_key'
try:
cache.set(test_key, 'ok', timeout=1)
result = cache.get(test_key)
latency = round((time.time() - start_time) * 1000, 2)
if result == 'ok':
return "UP", f"Read/Write successful. Latency: {latency}ms"
return "DOWN", "Failed to retrieve test key."
except Exception as e:
return "DOWN", f"Redis Error: {e}"
def _check_minio(self):
"""Logic ตรวจสอบ Object Storage (MinIO) โดยใช้ boto3"""
try:
# 1. สร้าง S3 Client ด้วย boto3
s3_client = boto3.client(
"s3",
endpoint_url=os.getenv("MINIO_ENDPOINT", "http://localhost:9000"),
aws_access_key_id=os.getenv("MINIO_ACCESS_KEY", "minio_admin"),
aws_secret_access_key=os.getenv("MINIO_SECRET_KEY", "minio_p@ssw0rd!"),
# ใช้ Config เพื่อจัดการ timeout/signature version
config=Config(signature_version="s3v4", connect_timeout=5, read_timeout=10)
)
bucket_name = os.getenv("MODEL_BUCKET", "models")
# 2. ทดสอบการเข้าถึง Bucket โดยใช้ head_bucket (มีประสิทธิภาพกว่า list_buckets)
# ถ้า Bucket มีอยู่ จะไม่เกิด Error
s3_client.head_bucket(Bucket=bucket_name)
return "UP", f"Bucket '{bucket_name}' accessible via boto3."
except ClientError as e:
error_code = e.response['Error']['Code']
if error_code == '404':
return "DOWN", f"Bucket '{bucket_name}' not found."
elif error_code == '403':
return "DOWN", f"MinIO S3 Access Denied. Check Key/Secret."
return "DOWN", f"MinIO S3 Error (Code {error_code}): {e}"
except Exception as e:
return "DOWN", f"MinIO Connection Error: {e}"
def _check_ai_service(self):
# Logic ตรวจสอบ FastAPI/MONAI (เหมือนเดิม)
start_time = time.time()
try:
response = requests.get(AI_SERVICE_URL, timeout=5)
response.raise_for_status()
data = response.json()
model_loaded = data.get("model_loaded", False)
latency = round((time.time() - start_time) * 1000, 2)
if model_loaded:
return "UP", f"Service running & Model loaded. Latency: {latency}ms"
else:
return "DOWN", f"Service running, but Model load status: {model_loaded}. Latency: {latency}ms"
except requests.exceptions.RequestException as e:
return "DOWN", f"FastAPI/MONAI Service Unreachable: {e}"
except Exception as e:
return "DOWN", f"AI Service Check Failed: {e}"
# -----------------------------------------------
# ตรวจสอบ Health Check ของ AI Model แต่ละตัว
# -----------------------------------------------
def _check_single_ai_model(self, model: AiModel):
"""เรียก Health Check ของโมเดลตัวเดียวโดยใช้ Full URL"""
# ใช้ endpoint เฉพาะสำหรับ Health Check (Path มี '/health' ต่อท้าย)
health_url = model.full_inference_url().rstrip('/') + '/health'
start_time = time.time()
try:
# ใช้ requests.get ไปยัง Health Check Endpoint
response = requests.get(health_url, timeout=5)
response.raise_for_status() # จะผ่านถ้าได้ 200 OK
# ... (ส่วนการคำนวณ Latency และ return "UP" เหมือนเดิม)
latency = round((time.time() - start_time) * 1000, 2)
return "UP", f"Model Health Check successful. Latency: {latency}ms"
except requests.exceptions.RequestException as e:
return "DOWN", f"Health Check Failed at {health_url}: {e}"
except Exception as e:
return "DOWN", f"Check Failed: {e}"
def _check_all_ai_models(self):
"""วนลูปตรวจสอบโมเดลทั้งหมดที่สถานะ ACTIVE"""
active_models = ai_model_repo.get_all().filter(status='ACTIVE')
model_statuses = []
for model in active_models:
status, detail = self._check_single_ai_model(model)
model_statuses.append({
"name": f"{model.name} (v{model.model_version})",
"id": str(model.id), # ส่งเป็น String เพื่อป้องกัน JS Truncation
"status": status,
"endpoint": model.full_inference_url(),
"details": detail
})
overall_model_health = "UP" if all(m['status'] == "UP" for m in model_statuses) else "DOWN"
return overall_model_health, model_statuses
def get_system_health(self):
"""เมธอดหลักที่รวมผลลัพธ์ทั้งหมด"""
results = {}
overall_status = "Healthy"
# ตรวจสอบ Health ของ AI Model แต่ละตัว
ai_overall_status, model_details = self._check_all_ai_models()
# รวมผลลัพธ์ของ Model Health เข้าไปใน results
results['model_endpoints'] = {"name": "AI Model Endpoints", "status": ai_overall_status, "models": model_details}
if ai_overall_status != 'UP': overall_status = "Degraded"
# รัน Check ทั้งหมด
db_status, db_details = self._check_database()
results['database'] = {"name": "CockroachDB", "status": db_status, "details": db_details}
if db_status != 'UP': overall_status = "Degraded"
cache_status, cache_details = self._check_cache()
results['cache'] = {"name": "Redis Cache", "status": cache_status, "details": cache_details}
if cache_status != 'UP': overall_status = "Degraded"
minio_status, minio_details = self._check_minio()
results['storage'] = {"name": "MinIO S3", "status": minio_status, "details": minio_details}
if minio_status != 'UP': overall_status = "Degraded"
ai_status, ai_details = self._check_ai_service()
results['ai_service'] = {"name": "MONAI FastAPI", "status": ai_status, "details": ai_details}
if ai_status != 'UP': overall_status = "Degraded"
return {
"status": overall_status,
"services": results,
"last_checked": datetime.now(timezone.utc).isoformat()
}

View File

View File

@ -0,0 +1,36 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status, permissions
from datetime import datetime, timezone
# Import Service Layer
from api.services.health_service import HealthService
# Dependency Injection: สร้าง Instance ของ Service
health_service = HealthService()
class SystemHealthCheck(APIView):
"""
GET /api/v1/health/
Controller สำหรบดงสถานะ Health Check ของระบบ
"""
permission_classes = [permissions.AllowAny]
def get(self, request):
try:
# เรียกใช้ Service Layer
response_data = health_service.get_system_health()
# กำหนด HTTP Status ตามสถานะรวม
http_status = status.HTTP_200_OK
if response_data['status'] != "Healthy":
http_status = status.HTTP_503_SERVICE_UNAVAILABLE
return Response(response_data, status=http_status)
except Exception as e:
# จัดการข้อผิดพลาดที่ไม่คาดคิดในระดับ View
return Response(
{"status": "Error", "detail": f"Internal Server Error during health check: {e}"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)

View File

@ -21,6 +21,9 @@ from rest_framework.routers import DefaultRouter
from model_registry.views.ai_model_viewset import AiModelRegistryViewSet
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView, SpectacularRedocView
# Import Health Check View ในแอพ /api
from api.views.health_check_view import SystemHealthCheck
# 1. กำหนดตัวแปร router ก่อนใช้งาน
router = DefaultRouter()
@ -49,6 +52,9 @@ urlpatterns = [
path('api/v1/auth/', include('djoser.urls')), # /users/ (Register/Update/Me), /users/set_password
path('api/v1/auth/', include('djoser.urls.jwt')), # /jwt/create (Login), /jwt/refresh (Refresh Token)
# Health Check Endpoint URL: /api/v1/health/
path('api/v1/health/', SystemHealthCheck.as_view(), name='system-health'),
# 3. รวม Router API
path('api/v1/', include(router.urls)),
]