th-air-quality-etl-ml/pipelines/transform_clean.py

69 lines
2.2 KiB
Python

from pydantic import BaseModel
from datetime import datetime
class AirQualitySchema(BaseModel):
station_id: str
station_nameTH: str
station_nameEN: str
areaTH: str
areaEN: str
station_type: str
latitude: float
longitude: float
pm25: float
pm10: float
o3: float
co: float
no2: float
so2: float
aqi: int
main_pollutant: str
record_time: datetime
class Config:
from_attributes = True # เพื่อให้สามารถแปลงจาก ORM object ได้
def transform_json(raw_data: dict):
try:
stations = raw_data.get('stations', [])
transformed = []
for station in stations:
AQILast = station.get('AQILast', {})
AQI = AQILast.get('AQI', {})
PM25 = AQILast.get('PM25', {})
PM10 = AQILast.get('PM10', {})
O3 = AQILast.get('O3', {})
CO = AQILast.get('CO', {})
NO2 = AQILast.get('NO2', {})
SO2 = AQILast.get('SO2', {})
# เตรียมข้อมูลให้ตรงกับ AirQualitySchema
data = AirQualitySchema(
station_id=station.get('stationID'),
station_nameTH=station.get('nameTH'),
station_nameEN=station.get('nameEN'),
areaTH=station.get('areaTH'),
areaEN=station.get('areaEN'),
station_type=station.get('stationType'),
latitude=float(station.get('lat', 0)),
longitude=float(station.get('long', 0)),
pm25=float(PM25.get('value', -1)),
pm10=float(PM10.get('value', -1)),
o3=float(O3.get('value', -1)),
co=float(CO.get('value', -1)),
no2=float(NO2.get('value', -1)),
so2=float(SO2.get('value', -1)),
aqi=int(AQI.get('aqi', -1)),
main_pollutant=AQI.get('param', ''),
record_time=datetime.strptime(f"{AQILast.get('date')} {AQILast.get('time')}", "%Y-%m-%d %H:%M")
)
transformed.append(data)
return transformed
except Exception as e:
print(f"Error transforming data: {e}")
return None