Initial commit: MLflow dashboard project
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
9
.claude/settings.local.json
Normal file
9
.claude/settings.local.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(git init:*)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git config --global --add safe.directory '%\\(prefix\\)///192.168.200.231/fermat/work/mlflow_docker/dashboard')"
|
||||
]
|
||||
}
|
||||
}
|
||||
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.env
|
||||
.venv/
|
||||
venv/
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
.idea/
|
||||
.vscode/
|
||||
*.log
|
||||
1
config.py
Normal file
1
config.py
Normal file
@@ -0,0 +1 @@
|
||||
DEFAULT_TRACKING_URI = "http://192.168.200.231:50111"
|
||||
27
main.py
Normal file
27
main.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
from routers import experiments, runs, serve, train
|
||||
|
||||
app = FastAPI(
|
||||
title="MLflow Dashboard API",
|
||||
description="MLflow Experiment/Run 조회 및 Train/Serve 관리",
|
||||
version="0.1.0",
|
||||
)
|
||||
|
||||
app.include_router(experiments.router, prefix="/api", tags=["Experiments"])
|
||||
app.include_router(runs.router, prefix="/api", tags=["Runs"])
|
||||
app.include_router(serve.router, prefix="/api", tags=["Serve"])
|
||||
app.include_router(train.router, prefix="/api", tags=["Train"])
|
||||
|
||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def index():
|
||||
return FileResponse("static/index.html")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
fastapi>=0.115.0
|
||||
uvicorn>=0.34.0
|
||||
mlflow>=3.8.0
|
||||
0
routers/__init__.py
Normal file
0
routers/__init__.py
Normal file
21
routers/experiments.py
Normal file
21
routers/experiments.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from typing import List, Optional
|
||||
from fastapi import APIRouter, Query
|
||||
from schemas import ExperimentSummary, RunSummary
|
||||
from services import mlflow_service
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/experiments", response_model=List[ExperimentSummary])
|
||||
def list_experiments(
|
||||
tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"),
|
||||
):
|
||||
return mlflow_service.get_experiments(tracking_uri)
|
||||
|
||||
|
||||
@router.get("/experiments/{exp_id}/runs", response_model=List[RunSummary])
|
||||
def list_runs(
|
||||
exp_id: str,
|
||||
tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"),
|
||||
):
|
||||
return mlflow_service.get_runs(tracking_uri, exp_id)
|
||||
23
routers/runs.py
Normal file
23
routers/runs.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from fastapi import APIRouter, Query
|
||||
from typing import Optional
|
||||
from schemas import RunDetail, MLflowLink
|
||||
from services import mlflow_service
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/runs/{run_id}", response_model=RunDetail)
|
||||
def get_run(
|
||||
run_id: str,
|
||||
tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"),
|
||||
):
|
||||
return mlflow_service.get_run_detail(tracking_uri, run_id)
|
||||
|
||||
|
||||
@router.get("/runs/{run_id}/mlflow-link", response_model=MLflowLink)
|
||||
def get_mlflow_link(
|
||||
run_id: str,
|
||||
tracking_uri: Optional[str] = Query(None, description="MLflow Tracking URI"),
|
||||
):
|
||||
url = mlflow_service.get_mlflow_link(tracking_uri, run_id)
|
||||
return {"url": url}
|
||||
73
routers/serve.py
Normal file
73
routers/serve.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import subprocess
|
||||
import uuid
|
||||
from typing import Dict, List
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from schemas import ServeRequest, ServeStatus
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# In-memory store for serving processes
|
||||
_serving_processes = {} # type: Dict[str, Dict]
|
||||
|
||||
|
||||
@router.post("/serve", response_model=ServeStatus)
|
||||
def start_serve(req: ServeRequest):
|
||||
serve_id = uuid.uuid4().hex[:8]
|
||||
cmd = [
|
||||
"mlflow", "models", "serve",
|
||||
"-m", req.model_uri,
|
||||
"-p", str(req.port),
|
||||
"--no-conda",
|
||||
]
|
||||
if req.tracking_uri:
|
||||
cmd.extend(["--env-manager", "local"])
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=500, detail="mlflow CLI not found")
|
||||
|
||||
_serving_processes[serve_id] = {
|
||||
"id": serve_id,
|
||||
"model_uri": req.model_uri,
|
||||
"port": req.port,
|
||||
"pid": proc.pid,
|
||||
"process": proc,
|
||||
}
|
||||
|
||||
return ServeStatus(
|
||||
id=serve_id,
|
||||
model_uri=req.model_uri,
|
||||
port=req.port,
|
||||
pid=proc.pid,
|
||||
status="running",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/serve", response_model=List[ServeStatus])
|
||||
def list_serve():
|
||||
results = []
|
||||
for sid, info in _serving_processes.items():
|
||||
proc = info["process"]
|
||||
status = "running" if proc.poll() is None else "stopped"
|
||||
results.append(ServeStatus(
|
||||
id=sid,
|
||||
model_uri=info["model_uri"],
|
||||
port=info["port"],
|
||||
pid=info["pid"],
|
||||
status=status,
|
||||
))
|
||||
return results
|
||||
|
||||
|
||||
@router.delete("/serve/{serve_id}")
|
||||
def stop_serve(serve_id: str):
|
||||
if serve_id not in _serving_processes:
|
||||
raise HTTPException(status_code=404, detail="Serve process not found")
|
||||
|
||||
proc = _serving_processes[serve_id]["process"]
|
||||
if proc.poll() is None:
|
||||
proc.terminate()
|
||||
|
||||
model_uri = _serving_processes.pop(serve_id)["model_uri"]
|
||||
return {"message": f"Stopped serving {model_uri}"}
|
||||
11
routers/train.py
Normal file
11
routers/train.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/train")
|
||||
def start_train():
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Train trigger is not implemented yet. Will be connected to training server.",
|
||||
)
|
||||
48
schemas.py
Normal file
48
schemas.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
class ExperimentSummary(BaseModel):
|
||||
experiment_id: str
|
||||
name: str
|
||||
lifecycle_stage: str
|
||||
run_count: int
|
||||
|
||||
|
||||
class RunSummary(BaseModel):
|
||||
run_id: str
|
||||
run_name: Optional[str] = None
|
||||
experiment_id: str
|
||||
status: str
|
||||
start_time: Optional[int] = None
|
||||
end_time: Optional[int] = None
|
||||
|
||||
|
||||
class RunDetail(BaseModel):
|
||||
run_id: str
|
||||
run_name: Optional[str] = None
|
||||
experiment_id: str
|
||||
status: str
|
||||
start_time: Optional[int] = None
|
||||
end_time: Optional[int] = None
|
||||
params: Dict[str, str] = {}
|
||||
metrics: Dict[str, float] = {}
|
||||
tags: Dict[str, str] = {}
|
||||
|
||||
|
||||
class MLflowLink(BaseModel):
|
||||
url: str
|
||||
|
||||
|
||||
class ServeRequest(BaseModel):
|
||||
tracking_uri: Optional[str] = None
|
||||
model_uri: str # e.g. "models:/model_name/1"
|
||||
port: int = 5001
|
||||
|
||||
|
||||
class ServeStatus(BaseModel):
|
||||
id: str
|
||||
model_uri: str
|
||||
port: int
|
||||
pid: int
|
||||
status: str # "running" or "stopped"
|
||||
0
services/__init__.py
Normal file
0
services/__init__.py
Normal file
80
services/mlflow_service.py
Normal file
80
services/mlflow_service.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from typing import Dict, List, Optional
|
||||
from mlflow.tracking import MlflowClient
|
||||
from mlflow.entities import ViewType
|
||||
from config import DEFAULT_TRACKING_URI
|
||||
|
||||
|
||||
def _client(tracking_uri=None):
|
||||
# type: (Optional[str]) -> MlflowClient
|
||||
return MlflowClient(tracking_uri=tracking_uri or DEFAULT_TRACKING_URI)
|
||||
|
||||
|
||||
def _tracking_uri(tracking_uri=None):
|
||||
# type: (Optional[str]) -> str
|
||||
return tracking_uri or DEFAULT_TRACKING_URI
|
||||
|
||||
|
||||
def get_experiments(tracking_uri=None):
|
||||
# type: (Optional[str]) -> List[Dict]
|
||||
client = _client(tracking_uri)
|
||||
experiments = client.search_experiments(view_type=ViewType.ACTIVE_ONLY)
|
||||
results = []
|
||||
for exp in experiments:
|
||||
run_count = len(client.search_runs(experiment_ids=[exp.experiment_id]))
|
||||
results.append({
|
||||
"experiment_id": exp.experiment_id,
|
||||
"name": exp.name,
|
||||
"lifecycle_stage": exp.lifecycle_stage,
|
||||
"run_count": run_count,
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def get_runs(tracking_uri=None, experiment_id="0"):
|
||||
# type: (Optional[str], str) -> List[Dict]
|
||||
client = _client(tracking_uri)
|
||||
runs = client.search_runs(
|
||||
experiment_ids=[experiment_id],
|
||||
run_view_type=ViewType.ACTIVE_ONLY,
|
||||
order_by=["attributes.start_time DESC"],
|
||||
)
|
||||
return [
|
||||
{
|
||||
"run_id": r.info.run_id,
|
||||
"run_name": r.info.run_name,
|
||||
"experiment_id": r.info.experiment_id,
|
||||
"status": r.info.status,
|
||||
"start_time": r.info.start_time,
|
||||
"end_time": r.info.end_time,
|
||||
}
|
||||
for r in runs
|
||||
]
|
||||
|
||||
|
||||
def get_run_detail(tracking_uri=None, run_id=""):
|
||||
# type: (Optional[str], str) -> Dict
|
||||
client = _client(tracking_uri)
|
||||
run = client.get_run(run_id)
|
||||
return {
|
||||
"run_id": run.info.run_id,
|
||||
"run_name": run.info.run_name,
|
||||
"experiment_id": run.info.experiment_id,
|
||||
"status": run.info.status,
|
||||
"start_time": run.info.start_time,
|
||||
"end_time": run.info.end_time,
|
||||
"params": dict(run.data.params) if run.data.params else {},
|
||||
"metrics": dict(run.data.metrics) if run.data.metrics else {},
|
||||
"tags": dict(run.data.tags) if run.data.tags else {},
|
||||
}
|
||||
|
||||
|
||||
def get_mlflow_link(tracking_uri=None, run_id=""):
|
||||
# type: (Optional[str], str) -> str
|
||||
client = _client(tracking_uri)
|
||||
run = client.get_run(run_id)
|
||||
base_url = _tracking_uri(tracking_uri).rstrip("/")
|
||||
return "{base}/#/experiments/{exp}/runs/{run}".format(
|
||||
base=base_url,
|
||||
exp=run.info.experiment_id,
|
||||
run=run.info.run_id,
|
||||
)
|
||||
118
static/app.js
Normal file
118
static/app.js
Normal file
@@ -0,0 +1,118 @@
|
||||
const API_BASE = window.location.origin;
|
||||
|
||||
function getTrackingUri() {
|
||||
return document.getElementById('trackingUri').value.trim() || null;
|
||||
}
|
||||
|
||||
function formatTime(ts) {
|
||||
if (!ts) return '';
|
||||
const d = new Date(ts);
|
||||
return d.toLocaleDateString('ko-KR') + ' ' + d.toLocaleTimeString('ko-KR', {hour: '2-digit', minute: '2-digit'});
|
||||
}
|
||||
|
||||
async function loadExperiments() {
|
||||
const container = document.getElementById('content');
|
||||
container.innerHTML = '<div class="loading">Loading...</div>';
|
||||
|
||||
const uri = getTrackingUri();
|
||||
const params = uri ? '?tracking_uri=' + encodeURIComponent(uri) : '';
|
||||
|
||||
try {
|
||||
const res = await fetch(API_BASE + '/api/experiments' + params);
|
||||
if (!res.ok) throw new Error('Failed: ' + res.status);
|
||||
const experiments = await res.json();
|
||||
|
||||
if (experiments.length === 0) {
|
||||
container.innerHTML = '<div class="loading">No experiments found.</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
container.innerHTML = '';
|
||||
experiments.forEach(function(exp) {
|
||||
const card = document.createElement('div');
|
||||
card.className = 'exp-card';
|
||||
card.innerHTML =
|
||||
'<div class="exp-header" onclick="toggleExp(this, \'' + exp.experiment_id + '\')">' +
|
||||
'<span class="exp-arrow">▶</span>' +
|
||||
'<span class="exp-name">' + exp.name + '</span>' +
|
||||
'<span class="exp-badge">' + exp.run_count + ' runs</span>' +
|
||||
'</div>' +
|
||||
'<div class="run-list" id="runs-' + exp.experiment_id + '">' +
|
||||
'<div class="loading">Loading runs...</div>' +
|
||||
'</div>';
|
||||
container.appendChild(card);
|
||||
});
|
||||
} catch (e) {
|
||||
container.innerHTML = '<div class="error">Connection failed: ' + e.message + '</div>';
|
||||
}
|
||||
}
|
||||
|
||||
async function toggleExp(header, expId) {
|
||||
const arrow = header.querySelector('.exp-arrow');
|
||||
const runList = document.getElementById('runs-' + expId);
|
||||
|
||||
if (runList.classList.contains('open')) {
|
||||
runList.classList.remove('open');
|
||||
arrow.classList.remove('open');
|
||||
return;
|
||||
}
|
||||
|
||||
arrow.classList.add('open');
|
||||
runList.classList.add('open');
|
||||
runList.innerHTML = '<div class="loading">Loading runs...</div>';
|
||||
|
||||
const uri = getTrackingUri();
|
||||
const params = uri ? '?tracking_uri=' + encodeURIComponent(uri) : '';
|
||||
|
||||
try {
|
||||
const res = await fetch(API_BASE + '/api/experiments/' + expId + '/runs' + params);
|
||||
const runs = await res.json();
|
||||
|
||||
if (runs.length === 0) {
|
||||
runList.innerHTML = '<div class="run-row" style="color:#888;">No runs</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
runList.innerHTML = '';
|
||||
runs.forEach(function(run) {
|
||||
const row = document.createElement('div');
|
||||
row.className = 'run-row';
|
||||
row.innerHTML =
|
||||
'<span class="run-name">' + (run.run_name || run.run_id.substring(0, 8)) + '</span>' +
|
||||
'<span class="status ' + run.status + '">' + run.status + '</span>' +
|
||||
'<span class="run-time">' + formatTime(run.start_time) + '</span>' +
|
||||
'<div class="btn-group">' +
|
||||
'<button class="btn btn-view" onclick="viewRun(\'' + run.run_id + '\')">View</button>' +
|
||||
'<button class="btn btn-train" onclick="trainRun(\'' + run.run_id + '\')">Train</button>' +
|
||||
'<button class="btn btn-serve" onclick="serveRun(\'' + run.run_id + '\')">Serve</button>' +
|
||||
'</div>';
|
||||
runList.appendChild(row);
|
||||
});
|
||||
} catch (e) {
|
||||
runList.innerHTML = '<div class="error">Failed to load runs</div>';
|
||||
}
|
||||
}
|
||||
|
||||
async function viewRun(runId) {
|
||||
const uri = getTrackingUri();
|
||||
const params = uri ? '?tracking_uri=' + encodeURIComponent(uri) : '';
|
||||
try {
|
||||
const res = await fetch(API_BASE + '/api/runs/' + runId + '/mlflow-link' + params);
|
||||
const data = await res.json();
|
||||
window.open(data.url, '_blank');
|
||||
} catch (e) {
|
||||
alert('Failed to get MLflow link');
|
||||
}
|
||||
}
|
||||
|
||||
function trainRun(runId) {
|
||||
alert('Train is not implemented yet.');
|
||||
}
|
||||
|
||||
function serveRun(runId) {
|
||||
alert('Serve: model_uri required. Use Swagger UI (/docs) for now.');
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
loadExperiments();
|
||||
});
|
||||
24
static/index.html
Normal file
24
static/index.html
Normal file
@@ -0,0 +1,24 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="ko">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>MLflow Dashboard</title>
|
||||
<link rel="stylesheet" href="/static/style.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>MLflow Dashboard</h1>
|
||||
<input type="text" id="trackingUri" placeholder="Tracking URI (e.g. http://192.168.200.231:50111)">
|
||||
<button onclick="loadExperiments()">Connect</button>
|
||||
</div>
|
||||
|
||||
<div class="container" id="content">
|
||||
<div class="loading">Tracking URI를 입력하고 Connect를 클릭하세요.</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/app.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
85
static/style.css
Normal file
85
static/style.css
Normal file
@@ -0,0 +1,85 @@
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body { font-family: 'Segoe UI', sans-serif; background: #f5f5f5; color: #333; }
|
||||
|
||||
.header {
|
||||
background: #1a1a2e; color: white; padding: 16px 24px;
|
||||
display: flex; align-items: center; gap: 16px;
|
||||
}
|
||||
.header h1 { font-size: 20px; font-weight: 600; }
|
||||
.header input {
|
||||
padding: 8px 12px; border: none; border-radius: 6px;
|
||||
width: 360px; font-size: 14px; background: #16213e; color: white;
|
||||
}
|
||||
.header input::placeholder { color: #888; }
|
||||
.header button {
|
||||
padding: 8px 20px; border: none; border-radius: 6px;
|
||||
background: #0f3460; color: white; cursor: pointer; font-size: 14px;
|
||||
}
|
||||
.header button:hover { background: #1a5276; }
|
||||
|
||||
.container { max-width: 1200px; margin: 24px auto; padding: 0 16px; }
|
||||
|
||||
.exp-card {
|
||||
background: white; border-radius: 8px; margin-bottom: 8px;
|
||||
box-shadow: 0 1px 3px rgba(0,0,0,0.1); overflow: hidden;
|
||||
}
|
||||
.exp-header {
|
||||
padding: 14px 20px; cursor: pointer; display: flex;
|
||||
align-items: center; gap: 12px; user-select: none;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
.exp-header:hover { background: #f8f9fa; }
|
||||
.exp-arrow {
|
||||
font-size: 12px; color: #666; transition: transform 0.2s;
|
||||
width: 16px; text-align: center;
|
||||
}
|
||||
.exp-arrow.open { transform: rotate(90deg); }
|
||||
.exp-name { font-weight: 600; font-size: 15px; flex: 1; }
|
||||
.exp-badge {
|
||||
background: #e8f0fe; color: #1967d2; padding: 2px 10px;
|
||||
border-radius: 12px; font-size: 12px; font-weight: 500;
|
||||
}
|
||||
|
||||
.run-list { display: none; border-top: 1px solid #eee; }
|
||||
.run-list.open { display: block; }
|
||||
|
||||
.run-row {
|
||||
padding: 10px 20px 10px 48px; display: flex; align-items: center;
|
||||
gap: 12px; border-bottom: 1px solid #f0f0f0; font-size: 14px;
|
||||
}
|
||||
.run-row:last-child { border-bottom: none; }
|
||||
.run-row:hover { background: #fafbfc; }
|
||||
|
||||
.run-name { flex: 1; font-weight: 500; }
|
||||
|
||||
.status {
|
||||
padding: 3px 10px; border-radius: 12px; font-size: 11px;
|
||||
font-weight: 600; text-transform: uppercase; min-width: 80px;
|
||||
text-align: center;
|
||||
}
|
||||
.status.FINISHED { background: #e6f4ea; color: #1e8e3e; }
|
||||
.status.RUNNING { background: #e8f0fe; color: #1967d2; }
|
||||
.status.FAILED { background: #fce8e6; color: #d93025; }
|
||||
.status.KILLED { background: #fef7e0; color: #ea8600; }
|
||||
.status.SCHEDULED { background: #f3e8fd; color: #8430ce; }
|
||||
|
||||
.btn-group { display: flex; gap: 6px; }
|
||||
.btn {
|
||||
padding: 5px 14px; border: 1px solid #ddd; border-radius: 4px;
|
||||
font-size: 12px; cursor: pointer; background: white;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.btn:hover { background: #f0f0f0; }
|
||||
.btn-view { border-color: #1967d2; color: #1967d2; }
|
||||
.btn-view:hover { background: #e8f0fe; }
|
||||
.btn-train { border-color: #1e8e3e; color: #1e8e3e; }
|
||||
.btn-train:hover { background: #e6f4ea; }
|
||||
.btn-serve { border-color: #ea8600; color: #ea8600; }
|
||||
.btn-serve:hover { background: #fef7e0; }
|
||||
|
||||
.loading { text-align: center; padding: 40px; color: #888; }
|
||||
.error { text-align: center; padding: 40px; color: #d93025; }
|
||||
|
||||
.run-time {
|
||||
color: #888; font-size: 12px; min-width: 140px; text-align: right;
|
||||
}
|
||||
Reference in New Issue
Block a user