พัฒนา Backend (เพิ่ม Health Check Endpoint)
This commit is contained in:
parent
16dd285bab
commit
a4b5dcf110
0
backend/api/services/__init__.py
Normal file
0
backend/api/services/__init__.py
Normal file
186
backend/api/services/health_service.py
Normal file
186
backend/api/services/health_service.py
Normal file
@ -0,0 +1,186 @@
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
from django.db import connection, Error as DjangoDBError
|
||||
from django.core.cache import cache
|
||||
from django.conf import settings
|
||||
import boto3
|
||||
from botocore.client import Config
|
||||
from botocore.exceptions import ClientError
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# นำเข้า Repository และ Service ของ Model Registry
|
||||
from model_registry.repositories.ai_model_repository import AiModelRepository
|
||||
from model_registry.models import AiModel
|
||||
|
||||
# สร้าง Instance ของ Repository และ Service (ถ้ายังไม่มีในไฟล์นี้)
|
||||
ai_model_repo = AiModelRepository()
|
||||
|
||||
# กำหนด URL ของ AI Service (ใช้ localhost สำหรับ Local Dev)
|
||||
AI_SERVICE_URL = os.getenv("AI_SERVICE_INTERNAL_URL", "http://localhost:8001")
|
||||
|
||||
# ใช้ Exception เดิมจาก model_registry
|
||||
class HealthCheckError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class HealthService:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _check_database(self):
|
||||
# Logic ตรวจสอบ CockroachDB (เหมือนเดิม)
|
||||
start_time = time.time()
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute("SELECT 1")
|
||||
latency = round((time.time() - start_time) * 1000, 2)
|
||||
return "UP", f"Query executed successfully. Latency: {latency}ms"
|
||||
except DjangoDBError as e:
|
||||
return "DOWN", f"DB Connection Error: {e}"
|
||||
except Exception as e:
|
||||
return "DOWN", f"Unknown DB Error: {e}"
|
||||
|
||||
def _check_cache(self):
|
||||
# Logic ตรวจสอบ Redis (เหมือนเดิม)
|
||||
start_time = time.time()
|
||||
test_key = 'health_test_key'
|
||||
try:
|
||||
cache.set(test_key, 'ok', timeout=1)
|
||||
result = cache.get(test_key)
|
||||
latency = round((time.time() - start_time) * 1000, 2)
|
||||
if result == 'ok':
|
||||
return "UP", f"Read/Write successful. Latency: {latency}ms"
|
||||
return "DOWN", "Failed to retrieve test key."
|
||||
except Exception as e:
|
||||
return "DOWN", f"Redis Error: {e}"
|
||||
|
||||
def _check_minio(self):
|
||||
"""Logic ตรวจสอบ Object Storage (MinIO) โดยใช้ boto3"""
|
||||
try:
|
||||
# 1. สร้าง S3 Client ด้วย boto3
|
||||
s3_client = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=os.getenv("MINIO_ENDPOINT", "http://localhost:9000"),
|
||||
aws_access_key_id=os.getenv("MINIO_ACCESS_KEY", "minio_admin"),
|
||||
aws_secret_access_key=os.getenv("MINIO_SECRET_KEY", "minio_p@ssw0rd!"),
|
||||
# ใช้ Config เพื่อจัดการ timeout/signature version
|
||||
config=Config(signature_version="s3v4", connect_timeout=5, read_timeout=10)
|
||||
)
|
||||
|
||||
bucket_name = os.getenv("MODEL_BUCKET", "models")
|
||||
|
||||
# 2. ทดสอบการเข้าถึง Bucket โดยใช้ head_bucket (มีประสิทธิภาพกว่า list_buckets)
|
||||
# ถ้า Bucket มีอยู่ จะไม่เกิด Error
|
||||
s3_client.head_bucket(Bucket=bucket_name)
|
||||
|
||||
return "UP", f"Bucket '{bucket_name}' accessible via boto3."
|
||||
|
||||
except ClientError as e:
|
||||
error_code = e.response['Error']['Code']
|
||||
if error_code == '404':
|
||||
return "DOWN", f"Bucket '{bucket_name}' not found."
|
||||
elif error_code == '403':
|
||||
return "DOWN", f"MinIO S3 Access Denied. Check Key/Secret."
|
||||
return "DOWN", f"MinIO S3 Error (Code {error_code}): {e}"
|
||||
except Exception as e:
|
||||
return "DOWN", f"MinIO Connection Error: {e}"
|
||||
|
||||
def _check_ai_service(self):
|
||||
# Logic ตรวจสอบ FastAPI/MONAI (เหมือนเดิม)
|
||||
start_time = time.time()
|
||||
try:
|
||||
response = requests.get(AI_SERVICE_URL, timeout=5)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
model_loaded = data.get("model_loaded", False)
|
||||
|
||||
latency = round((time.time() - start_time) * 1000, 2)
|
||||
|
||||
if model_loaded:
|
||||
return "UP", f"Service running & Model loaded. Latency: {latency}ms"
|
||||
else:
|
||||
return "DOWN", f"Service running, but Model load status: {model_loaded}. Latency: {latency}ms"
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return "DOWN", f"FastAPI/MONAI Service Unreachable: {e}"
|
||||
except Exception as e:
|
||||
return "DOWN", f"AI Service Check Failed: {e}"
|
||||
|
||||
# -----------------------------------------------
|
||||
# ตรวจสอบ Health Check ของ AI Model แต่ละตัว
|
||||
# -----------------------------------------------
|
||||
def _check_single_ai_model(self, model: AiModel):
|
||||
"""เรียก Health Check ของโมเดลตัวเดียวโดยใช้ Full URL"""
|
||||
# ใช้ endpoint เฉพาะสำหรับ Health Check (Path มี '/health' ต่อท้าย)
|
||||
health_url = model.full_inference_url().rstrip('/') + '/health'
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# ใช้ requests.get ไปยัง Health Check Endpoint
|
||||
response = requests.get(health_url, timeout=5)
|
||||
response.raise_for_status() # จะผ่านถ้าได้ 200 OK
|
||||
# ... (ส่วนการคำนวณ Latency และ return "UP" เหมือนเดิม)
|
||||
latency = round((time.time() - start_time) * 1000, 2)
|
||||
return "UP", f"Model Health Check successful. Latency: {latency}ms"
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return "DOWN", f"Health Check Failed at {health_url}: {e}"
|
||||
except Exception as e:
|
||||
return "DOWN", f"Check Failed: {e}"
|
||||
|
||||
|
||||
def _check_all_ai_models(self):
|
||||
"""วนลูปตรวจสอบโมเดลทั้งหมดที่สถานะ ACTIVE"""
|
||||
active_models = ai_model_repo.get_all().filter(status='ACTIVE')
|
||||
model_statuses = []
|
||||
|
||||
for model in active_models:
|
||||
status, detail = self._check_single_ai_model(model)
|
||||
model_statuses.append({
|
||||
"name": f"{model.name} (v{model.model_version})",
|
||||
"id": str(model.id), # ส่งเป็น String เพื่อป้องกัน JS Truncation
|
||||
"status": status,
|
||||
"endpoint": model.full_inference_url(),
|
||||
"details": detail
|
||||
})
|
||||
|
||||
overall_model_health = "UP" if all(m['status'] == "UP" for m in model_statuses) else "DOWN"
|
||||
|
||||
return overall_model_health, model_statuses
|
||||
|
||||
def get_system_health(self):
|
||||
"""เมธอดหลักที่รวมผลลัพธ์ทั้งหมด"""
|
||||
results = {}
|
||||
overall_status = "Healthy"
|
||||
|
||||
# ตรวจสอบ Health ของ AI Model แต่ละตัว
|
||||
ai_overall_status, model_details = self._check_all_ai_models()
|
||||
|
||||
# รวมผลลัพธ์ของ Model Health เข้าไปใน results
|
||||
results['model_endpoints'] = {"name": "AI Model Endpoints", "status": ai_overall_status, "models": model_details}
|
||||
if ai_overall_status != 'UP': overall_status = "Degraded"
|
||||
|
||||
# รัน Check ทั้งหมด
|
||||
db_status, db_details = self._check_database()
|
||||
results['database'] = {"name": "CockroachDB", "status": db_status, "details": db_details}
|
||||
if db_status != 'UP': overall_status = "Degraded"
|
||||
|
||||
cache_status, cache_details = self._check_cache()
|
||||
results['cache'] = {"name": "Redis Cache", "status": cache_status, "details": cache_details}
|
||||
if cache_status != 'UP': overall_status = "Degraded"
|
||||
|
||||
minio_status, minio_details = self._check_minio()
|
||||
results['storage'] = {"name": "MinIO S3", "status": minio_status, "details": minio_details}
|
||||
if minio_status != 'UP': overall_status = "Degraded"
|
||||
|
||||
ai_status, ai_details = self._check_ai_service()
|
||||
results['ai_service'] = {"name": "MONAI FastAPI", "status": ai_status, "details": ai_details}
|
||||
if ai_status != 'UP': overall_status = "Degraded"
|
||||
|
||||
return {
|
||||
"status": overall_status,
|
||||
"services": results,
|
||||
"last_checked": datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
0
backend/api/views/__init__.py
Normal file
0
backend/api/views/__init__.py
Normal file
36
backend/api/views/health_check_view.py
Normal file
36
backend/api/views/health_check_view.py
Normal file
@ -0,0 +1,36 @@
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.response import Response
|
||||
from rest_framework import status, permissions
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# Import Service Layer
|
||||
from api.services.health_service import HealthService
|
||||
|
||||
# Dependency Injection: สร้าง Instance ของ Service
|
||||
health_service = HealthService()
|
||||
|
||||
class SystemHealthCheck(APIView):
|
||||
"""
|
||||
GET /api/v1/health/
|
||||
Controller สำหรับดึงสถานะ Health Check ของระบบ
|
||||
"""
|
||||
permission_classes = [permissions.AllowAny]
|
||||
|
||||
def get(self, request):
|
||||
try:
|
||||
# เรียกใช้ Service Layer
|
||||
response_data = health_service.get_system_health()
|
||||
|
||||
# กำหนด HTTP Status ตามสถานะรวม
|
||||
http_status = status.HTTP_200_OK
|
||||
if response_data['status'] != "Healthy":
|
||||
http_status = status.HTTP_503_SERVICE_UNAVAILABLE
|
||||
|
||||
return Response(response_data, status=http_status)
|
||||
|
||||
except Exception as e:
|
||||
# จัดการข้อผิดพลาดที่ไม่คาดคิดในระดับ View
|
||||
return Response(
|
||||
{"status": "Error", "detail": f"Internal Server Error during health check: {e}"},
|
||||
status=status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
)
|
||||
@ -21,6 +21,9 @@ from rest_framework.routers import DefaultRouter
|
||||
from model_registry.views.ai_model_viewset import AiModelRegistryViewSet
|
||||
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView, SpectacularRedocView
|
||||
|
||||
# Import Health Check View ในแอพ /api
|
||||
from api.views.health_check_view import SystemHealthCheck
|
||||
|
||||
# 1. กำหนดตัวแปร router ก่อนใช้งาน
|
||||
router = DefaultRouter()
|
||||
|
||||
@ -49,6 +52,9 @@ urlpatterns = [
|
||||
path('api/v1/auth/', include('djoser.urls')), # /users/ (Register/Update/Me), /users/set_password
|
||||
path('api/v1/auth/', include('djoser.urls.jwt')), # /jwt/create (Login), /jwt/refresh (Refresh Token)
|
||||
|
||||
# Health Check Endpoint URL: /api/v1/health/
|
||||
path('api/v1/health/', SystemHealthCheck.as_view(), name='system-health'),
|
||||
|
||||
# 3. รวม Router API
|
||||
path('api/v1/', include(router.urls)),
|
||||
]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user