commit 15c2dc95f62bfd881cf9494e8c42082194c29bce Author: rudals252 Date: Thu Feb 19 17:21:51 2026 +0900 Initial commit: MLflow dashboard project Co-Authored-By: Claude Opus 4.6 diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..2e94973 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(git init:*)", + "Bash(git add:*)", + "Bash(git config --global --add safe.directory '%\\(prefix\\)///192.168.200.231/fermat/work/mlflow_docker/dashboard')" + ] + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b49566a --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so +.env +.venv/ +venv/ +*.egg-info/ +dist/ +build/ +.idea/ +.vscode/ +*.log diff --git a/config.py b/config.py new file mode 100644 index 0000000..0c98000 --- /dev/null +++ b/config.py @@ -0,0 +1 @@ +DEFAULT_TRACKING_URI = "http://192.168.200.231:50111" diff --git a/main.py b/main.py new file mode 100644 index 0000000..bbd5fc9 --- /dev/null +++ b/main.py @@ -0,0 +1,27 @@ +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +from routers import experiments, runs, serve, train + +app = FastAPI( + title="MLflow Dashboard API", + description="MLflow Experiment/Run 조회 및 Train/Serve 관리", + version="0.1.0", +) + +app.include_router(experiments.router, prefix="/api", tags=["Experiments"]) +app.include_router(runs.router, prefix="/api", tags=["Runs"]) +app.include_router(serve.router, prefix="/api", tags=["Serve"]) +app.include_router(train.router, prefix="/api", tags=["Train"]) + +app.mount("/static", StaticFiles(directory="static"), name="static") + + +@app.get("/") +def index(): + return FileResponse("static/index.html") + + +@app.get("/health") +def health(): + return {"status": "ok"} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c8b1a76 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +fastapi>=0.115.0 +uvicorn>=0.34.0 +mlflow>=3.8.0 diff --git a/routers/__init__.py b/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/routers/experiments.py b/routers/experiments.py new file mode 100644 index 0000000..3e2e2ec --- /dev/null +++ b/routers/experiments.py @@ -0,0 +1,21 @@ +from typing import List, Optional +from fastapi import APIRouter, Query +from schemas import ExperimentSummary, RunSummary +from services import mlflow_service + +router = APIRouter() + + +@router.get("/experiments", response_model=List[ExperimentSummary]) +def list_experiments( + tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"), +): + return mlflow_service.get_experiments(tracking_uri) + + +@router.get("/experiments/{exp_id}/runs", response_model=List[RunSummary]) +def list_runs( + exp_id: str, + tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"), +): + return mlflow_service.get_runs(tracking_uri, exp_id) diff --git a/routers/runs.py b/routers/runs.py new file mode 100644 index 0000000..045ea05 --- /dev/null +++ b/routers/runs.py @@ -0,0 +1,23 @@ +from fastapi import APIRouter, Query +from typing import Optional +from schemas import RunDetail, MLflowLink +from services import mlflow_service + +router = APIRouter() + + +@router.get("/runs/{run_id}", response_model=RunDetail) +def get_run( + run_id: str, + tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"), +): + return mlflow_service.get_run_detail(tracking_uri, run_id) + + +@router.get("/runs/{run_id}/mlflow-link", response_model=MLflowLink) +def get_mlflow_link( + run_id: str, + tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"), +): + url = mlflow_service.get_mlflow_link(tracking_uri, run_id) + return {"url": url} diff --git a/routers/serve.py b/routers/serve.py new file mode 100644 index 0000000..1799471 --- /dev/null +++ b/routers/serve.py @@ -0,0 +1,73 @@ +import subprocess +import uuid +from typing import Dict, List +from fastapi import APIRouter, HTTPException +from schemas import ServeRequest, ServeStatus + +router = APIRouter() + +# In-memory store for serving processes +_serving_processes = {} # type: Dict[str, Dict] + + +@router.post("/serve", response_model=ServeStatus) +def start_serve(req: ServeRequest): + serve_id = uuid.uuid4().hex[:8] + cmd = [ + "mlflow", "models", "serve", + "-m", req.model_uri, + "-p", str(req.port), + "--no-conda", + ] + if req.tracking_uri: + cmd.extend(["--env-manager", "local"]) + + try: + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except FileNotFoundError: + raise HTTPException(status_code=500, detail="mlflow CLI not found") + + _serving_processes[serve_id] = { + "id": serve_id, + "model_uri": req.model_uri, + "port": req.port, + "pid": proc.pid, + "process": proc, + } + + return ServeStatus( + id=serve_id, + model_uri=req.model_uri, + port=req.port, + pid=proc.pid, + status="running", + ) + + +@router.get("/serve", response_model=List[ServeStatus]) +def list_serve(): + results = [] + for sid, info in _serving_processes.items(): + proc = info["process"] + status = "running" if proc.poll() is None else "stopped" + results.append(ServeStatus( + id=sid, + model_uri=info["model_uri"], + port=info["port"], + pid=info["pid"], + status=status, + )) + return results + + +@router.delete("/serve/{serve_id}") +def stop_serve(serve_id: str): + if serve_id not in _serving_processes: + raise HTTPException(status_code=404, detail="Serve process not found") + + proc = _serving_processes[serve_id]["process"] + if proc.poll() is None: + proc.terminate() + + model_uri = _serving_processes.pop(serve_id)["model_uri"] + return {"message": f"Stopped serving {model_uri}"} diff --git a/routers/train.py b/routers/train.py new file mode 100644 index 0000000..59034a6 --- /dev/null +++ b/routers/train.py @@ -0,0 +1,11 @@ +from fastapi import APIRouter, HTTPException + +router = APIRouter() + + +@router.post("/train") +def start_train(): + raise HTTPException( + status_code=501, + detail="Train trigger is not implemented yet. Will be connected to training server.", + ) diff --git a/schemas.py b/schemas.py new file mode 100644 index 0000000..6c26bc9 --- /dev/null +++ b/schemas.py @@ -0,0 +1,48 @@ +from pydantic import BaseModel +from typing import Dict, Optional + + +class ExperimentSummary(BaseModel): + experiment_id: str + name: str + lifecycle_stage: str + run_count: int + + +class RunSummary(BaseModel): + run_id: str + run_name: Optional[str] = None + experiment_id: str + status: str + start_time: Optional[int] = None + end_time: Optional[int] = None + + +class RunDetail(BaseModel): + run_id: str + run_name: Optional[str] = None + experiment_id: str + status: str + start_time: Optional[int] = None + end_time: Optional[int] = None + params: Dict[str, str] = {} + metrics: Dict[str, float] = {} + tags: Dict[str, str] = {} + + +class MLflowLink(BaseModel): + url: str + + +class ServeRequest(BaseModel): + tracking_uri: Optional[str] = None + model_uri: str # e.g. "models:/model_name/1" + port: int = 5001 + + +class ServeStatus(BaseModel): + id: str + model_uri: str + port: int + pid: int + status: str # "running" or "stopped" diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/mlflow_service.py b/services/mlflow_service.py new file mode 100644 index 0000000..ac3001e --- /dev/null +++ b/services/mlflow_service.py @@ -0,0 +1,80 @@ +from typing import Dict, List, Optional +from mlflow.tracking import MlflowClient +from mlflow.entities import ViewType +from config import DEFAULT_TRACKING_URI + + +def _client(tracking_uri=None): + # type: (Optional[str]) -> MlflowClient + return MlflowClient(tracking_uri=tracking_uri or DEFAULT_TRACKING_URI) + + +def _tracking_uri(tracking_uri=None): + # type: (Optional[str]) -> str + return tracking_uri or DEFAULT_TRACKING_URI + + +def get_experiments(tracking_uri=None): + # type: (Optional[str]) -> List[Dict] + client = _client(tracking_uri) + experiments = client.search_experiments(view_type=ViewType.ACTIVE_ONLY) + results = [] + for exp in experiments: + run_count = len(client.search_runs(experiment_ids=[exp.experiment_id])) + results.append({ + "experiment_id": exp.experiment_id, + "name": exp.name, + "lifecycle_stage": exp.lifecycle_stage, + "run_count": run_count, + }) + return results + + +def get_runs(tracking_uri=None, experiment_id="0"): + # type: (Optional[str], str) -> List[Dict] + client = _client(tracking_uri) + runs = client.search_runs( + experiment_ids=[experiment_id], + run_view_type=ViewType.ACTIVE_ONLY, + order_by=["attributes.start_time DESC"], + ) + return [ + { + "run_id": r.info.run_id, + "run_name": r.info.run_name, + "experiment_id": r.info.experiment_id, + "status": r.info.status, + "start_time": r.info.start_time, + "end_time": r.info.end_time, + } + for r in runs + ] + + +def get_run_detail(tracking_uri=None, run_id=""): + # type: (Optional[str], str) -> Dict + client = _client(tracking_uri) + run = client.get_run(run_id) + return { + "run_id": run.info.run_id, + "run_name": run.info.run_name, + "experiment_id": run.info.experiment_id, + "status": run.info.status, + "start_time": run.info.start_time, + "end_time": run.info.end_time, + "params": dict(run.data.params) if run.data.params else {}, + "metrics": dict(run.data.metrics) if run.data.metrics else {}, + "tags": dict(run.data.tags) if run.data.tags else {}, + } + + +def get_mlflow_link(tracking_uri=None, run_id=""): + # type: (Optional[str], str) -> str + client = _client(tracking_uri) + run = client.get_run(run_id) + base_url = _tracking_uri(tracking_uri).rstrip("/") + return "{base}/#/experiments/{exp}/runs/{run}".format( + base=base_url, + exp=run.info.experiment_id, + run=run.info.run_id, + ) diff --git a/static/app.js b/static/app.js new file mode 100644 index 0000000..05e0117 --- /dev/null +++ b/static/app.js @@ -0,0 +1,118 @@ +const API_BASE = window.location.origin; + +function getTrackingUri() { + return document.getElementById('trackingUri').value.trim() || null; +} + +function formatTime(ts) { + if (!ts) return ''; + const d = new Date(ts); + return d.toLocaleDateString('ko-KR') + ' ' + d.toLocaleTimeString('ko-KR', {hour: '2-digit', minute: '2-digit'}); +} + +async function loadExperiments() { + const container = document.getElementById('content'); + container.innerHTML = '
Loading...
'; + + const uri = getTrackingUri(); + const params = uri ? '?tracking_uri=' + encodeURIComponent(uri) : ''; + + try { + const res = await fetch(API_BASE + '/api/experiments' + params); + if (!res.ok) throw new Error('Failed: ' + res.status); + const experiments = await res.json(); + + if (experiments.length === 0) { + container.innerHTML = '
No experiments found.
'; + return; + } + + container.innerHTML = ''; + experiments.forEach(function(exp) { + const card = document.createElement('div'); + card.className = 'exp-card'; + card.innerHTML = + '
' + + '' + + '' + exp.name + '' + + '' + exp.run_count + ' runs' + + '
' + + '
' + + '
Loading runs...
' + + '
'; + container.appendChild(card); + }); + } catch (e) { + container.innerHTML = '
Connection failed: ' + e.message + '
'; + } +} + +async function toggleExp(header, expId) { + const arrow = header.querySelector('.exp-arrow'); + const runList = document.getElementById('runs-' + expId); + + if (runList.classList.contains('open')) { + runList.classList.remove('open'); + arrow.classList.remove('open'); + return; + } + + arrow.classList.add('open'); + runList.classList.add('open'); + runList.innerHTML = '
Loading runs...
'; + + const uri = getTrackingUri(); + const params = uri ? '?tracking_uri=' + encodeURIComponent(uri) : ''; + + try { + const res = await fetch(API_BASE + '/api/experiments/' + expId + '/runs' + params); + const runs = await res.json(); + + if (runs.length === 0) { + runList.innerHTML = '
No runs
'; + return; + } + + runList.innerHTML = ''; + runs.forEach(function(run) { + const row = document.createElement('div'); + row.className = 'run-row'; + row.innerHTML = + '' + (run.run_name || run.run_id.substring(0, 8)) + '' + + '' + run.status + '' + + '' + formatTime(run.start_time) + '' + + '
' + + '' + + '' + + '' + + '
'; + runList.appendChild(row); + }); + } catch (e) { + runList.innerHTML = '
Failed to load runs
'; + } +} + +async function viewRun(runId) { + const uri = getTrackingUri(); + const params = uri ? '?tracking_uri=' + encodeURIComponent(uri) : ''; + try { + const res = await fetch(API_BASE + '/api/runs/' + runId + '/mlflow-link' + params); + const data = await res.json(); + window.open(data.url, '_blank'); + } catch (e) { + alert('Failed to get MLflow link'); + } +} + +function trainRun(runId) { + alert('Train is not implemented yet.'); +} + +function serveRun(runId) { + alert('Serve: model_uri required. Use Swagger UI (/docs) for now.'); +} + +document.addEventListener('DOMContentLoaded', function() { + loadExperiments(); +}); diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000..063fd21 --- /dev/null +++ b/static/index.html @@ -0,0 +1,24 @@ + + + + + + MLflow Dashboard + + + + +
+

MLflow Dashboard

+ + +
+ +
+
Tracking URI를 입력하고 Connect를 클릭하세요.
+
+ + + + + diff --git a/static/style.css b/static/style.css new file mode 100644 index 0000000..1855fa3 --- /dev/null +++ b/static/style.css @@ -0,0 +1,85 @@ +* { margin: 0; padding: 0; box-sizing: border-box; } +body { font-family: 'Segoe UI', sans-serif; background: #f5f5f5; color: #333; } + +.header { + background: #1a1a2e; color: white; padding: 16px 24px; + display: flex; align-items: center; gap: 16px; +} +.header h1 { font-size: 20px; font-weight: 600; } +.header input { + padding: 8px 12px; border: none; border-radius: 6px; + width: 360px; font-size: 14px; background: #16213e; color: white; +} +.header input::placeholder { color: #888; } +.header button { + padding: 8px 20px; border: none; border-radius: 6px; + background: #0f3460; color: white; cursor: pointer; font-size: 14px; +} +.header button:hover { background: #1a5276; } + +.container { max-width: 1200px; margin: 24px auto; padding: 0 16px; } + +.exp-card { + background: white; border-radius: 8px; margin-bottom: 8px; + box-shadow: 0 1px 3px rgba(0,0,0,0.1); overflow: hidden; +} +.exp-header { + padding: 14px 20px; cursor: pointer; display: flex; + align-items: center; gap: 12px; user-select: none; + transition: background 0.15s; +} +.exp-header:hover { background: #f8f9fa; } +.exp-arrow { + font-size: 12px; color: #666; transition: transform 0.2s; + width: 16px; text-align: center; +} +.exp-arrow.open { transform: rotate(90deg); } +.exp-name { font-weight: 600; font-size: 15px; flex: 1; } +.exp-badge { + background: #e8f0fe; color: #1967d2; padding: 2px 10px; + border-radius: 12px; font-size: 12px; font-weight: 500; +} + +.run-list { display: none; border-top: 1px solid #eee; } +.run-list.open { display: block; } + +.run-row { + padding: 10px 20px 10px 48px; display: flex; align-items: center; + gap: 12px; border-bottom: 1px solid #f0f0f0; font-size: 14px; +} +.run-row:last-child { border-bottom: none; } +.run-row:hover { background: #fafbfc; } + +.run-name { flex: 1; font-weight: 500; } + +.status { + padding: 3px 10px; border-radius: 12px; font-size: 11px; + font-weight: 600; text-transform: uppercase; min-width: 80px; + text-align: center; +} +.status.FINISHED { background: #e6f4ea; color: #1e8e3e; } +.status.RUNNING { background: #e8f0fe; color: #1967d2; } +.status.FAILED { background: #fce8e6; color: #d93025; } +.status.KILLED { background: #fef7e0; color: #ea8600; } +.status.SCHEDULED { background: #f3e8fd; color: #8430ce; } + +.btn-group { display: flex; gap: 6px; } +.btn { + padding: 5px 14px; border: 1px solid #ddd; border-radius: 4px; + font-size: 12px; cursor: pointer; background: white; + transition: all 0.15s; +} +.btn:hover { background: #f0f0f0; } +.btn-view { border-color: #1967d2; color: #1967d2; } +.btn-view:hover { background: #e8f0fe; } +.btn-train { border-color: #1e8e3e; color: #1e8e3e; } +.btn-train:hover { background: #e6f4ea; } +.btn-serve { border-color: #ea8600; color: #ea8600; } +.btn-serve:hover { background: #fef7e0; } + +.loading { text-align: center; padding: 40px; color: #888; } +.error { text-align: center; padding: 40px; color: #d93025; } + +.run-time { + color: #888; font-size: 12px; min-width: 140px; text-align: right; +}