快速发现和定位线上问题的完整解决方案
1. 建立完善的监控体系
前端监控系统
错误监控
javascript
// 全局错误捕获
window.addEventListener('error', (event) => {
const errorInfo = {
message: event.message,
filename: event.filename,
lineno: event.lineno,
colno: event.colno,
stack: event.error?.stack,
timestamp: Date.now(),
userAgent: navigator.userAgent,
url: window.location.href
};
// 发送错误信息到监控平台
sendErrorToMonitoring(errorInfo);
});
// Promise 错误捕获
window.addEventListener('unhandledrejection', (event) => {
const errorInfo = {
type: 'unhandledrejection',
reason: event.reason,
timestamp: Date.now(),
url: window.location.href
};
sendErrorToMonitoring(errorInfo);
});
// 资源加载错误
window.addEventListener('error', (event) => {
if (event.target !== window) {
const errorInfo = {
type: 'resource',
tagName: event.target.tagName,
src: event.target.src || event.target.href,
timestamp: Date.now()
};
sendErrorToMonitoring(errorInfo);
}
}, true);
性能监控
javascript
// 页面性能监控
const performanceMonitor = () => {
if ('performance' in window) {
const timing = performance.timing;
const navigation = performance.navigation;
const metrics = {
// 页面加载时间
loadTime: timing.loadEventEnd - timing.navigationStart,
// DNS 查询时间
dnsTime: timing.domainLookupEnd - timing.domainLookupStart,
// TCP 连接时间
tcpTime: timing.connectEnd - timing.connectStart,
// 首字节时间
ttfb: timing.responseStart - timing.navigationStart,
// DOM 解析时间
domParseTime: timing.domContentLoadedEventEnd - timing.domLoading,
// 白屏时间
whiteScreenTime: timing.responseStart - timing.navigationStart,
// 首屏时间
firstScreenTime: timing.loadEventEnd - timing.navigationStart
};
sendPerformanceData(metrics);
}
};
// 使用 PerformanceObserver 监控更多指标
if ('PerformanceObserver' in window) {
// 监控 LCP (Largest Contentful Paint)
const lcpObserver = new PerformanceObserver((list) => {
const entries = list.getEntries();
const lastEntry = entries[entries.length - 1];
console.log('LCP:', lastEntry.startTime);
sendMetric('lcp', lastEntry.startTime);
});
lcpObserver.observe({ entryTypes: ['largest-contentful-paint'] });
// 监控 FID (First Input Delay)
const fidObserver = new PerformanceObserver((list) => {
const entries = list.getEntries();
entries.forEach((entry) => {
console.log('FID:', entry.processingStart - entry.startTime);
sendMetric('fid', entry.processingStart - entry.startTime);
});
});
fidObserver.observe({ entryTypes: ['first-input'] });
}
用户行为监控
javascript
// 用户行为追踪
const userBehaviorTracker = {
// 页面访问记录
trackPageView: () => {
const pageInfo = {
url: window.location.href,
title: document.title,
referrer: document.referrer,
timestamp: Date.now(),
userId: getCurrentUserId(),
sessionId: getSessionId()
};
sendBehaviorData('pageview', pageInfo);
},
// 点击事件追踪
trackClick: (element) => {
const clickInfo = {
tagName: element.tagName,
className: element.className,
id: element.id,
text: element.textContent?.slice(0, 100),
xpath: getXPath(element),
timestamp: Date.now()
};
sendBehaviorData('click', clickInfo);
},
// 表单提交追踪
trackFormSubmit: (form) => {
const formInfo = {
action: form.action,
method: form.method,
fields: Array.from(form.elements).map(el => ({
name: el.name,
type: el.type,
value: el.type === 'password' ? '[HIDDEN]' : el.value?.slice(0, 50)
})),
timestamp: Date.now()
};
sendBehaviorData('form_submit', formInfo);
}
};
// 自动绑定事件
document.addEventListener('click', (e) => {
userBehaviorTracker.trackClick(e.target);
});
document.addEventListener('submit', (e) => {
userBehaviorTracker.trackFormSubmit(e.target);
});
2. 日志系统设计
结构化日志
javascript
// 日志级别定义
const LogLevel = {
ERROR: 0,
WARN: 1,
INFO: 2,
DEBUG: 3
};
// 日志记录器
class Logger {
constructor(options = {}) {
this.level = options.level || LogLevel.INFO;
this.context = options.context || {};
this.transports = options.transports || [new ConsoleTransport()];
}
// 记录错误日志
error = (message, meta = {}) => {
this.log(LogLevel.ERROR, message, meta);
};
// 记录警告日志
warn = (message, meta = {}) => {
this.log(LogLevel.WARN, message, meta);
};
// 记录信息日志
info = (message, meta = {}) => {
this.log(LogLevel.INFO, message, meta);
};
// 记录调试日志
debug = (message, meta = {}) => {
this.log(LogLevel.DEBUG, message, meta);
};
// 核心日志方法
log = (level, message, meta) => {
if (level > this.level) return;
const logEntry = {
timestamp: new Date().toISOString(),
level: Object.keys(LogLevel)[level],
message,
meta: { ...this.context, ...meta },
url: window.location.href,
userAgent: navigator.userAgent,
userId: getCurrentUserId(),
sessionId: getSessionId(),
traceId: generateTraceId()
};
this.transports.forEach(transport => {
transport.log(logEntry);
});
};
}
// 控制台传输器
class ConsoleTransport {
log = (entry) => {
const method = entry.level.toLowerCase();
console[method](`[${entry.timestamp}] ${entry.message}`, entry.meta);
};
}
// 远程传输器
class RemoteTransport {
constructor(endpoint) {
this.endpoint = endpoint;
this.buffer = [];
this.batchSize = 10;
this.flushInterval = 5000;
setInterval(this.flush, this.flushInterval);
}
log = (entry) => {
this.buffer.push(entry);
if (this.buffer.length >= this.batchSize) {
this.flush();
}
};
flush = () => {
if (this.buffer.length === 0) return;
const logs = [...this.buffer];
this.buffer = [];
fetch(this.endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ logs })
}).catch(error => {
console.error('Failed to send logs:', error);
// 重新加入缓冲区
this.buffer.unshift(...logs);
});
};
}
3. 实时告警系统
告警规则配置
javascript
// 告警规则定义
const alertRules = {
// 错误率告警
errorRate: {
threshold: 0.05, // 5%
window: 300000, // 5分钟
condition: 'greater_than'
},
// 响应时间告警
responseTime: {
threshold: 3000, // 3秒
window: 300000,
condition: 'greater_than'
},
// 页面加载时间告警
pageLoadTime: {
threshold: 5000, // 5秒
window: 300000,
condition: 'greater_than'
}
};
// 告警检查器
class AlertChecker {
constructor() {
this.metrics = new Map();
this.alertHistory = new Map();
setInterval(this.checkAlerts, 60000); // 每分钟检查一次
}
// 添加指标数据
addMetric = (type, value, timestamp = Date.now()) => {
if (!this.metrics.has(type)) {
this.metrics.set(type, []);
}
this.metrics.get(type).push({ value, timestamp });
// 清理过期数据
this.cleanupOldMetrics(type);
};
// 检查告警
checkAlerts = () => {
Object.entries(alertRules).forEach(([ruleType, rule]) => {
const shouldAlert = this.evaluateRule(ruleType, rule);
if (shouldAlert && !this.isAlertSuppressed(ruleType)) {
this.triggerAlert(ruleType, rule);
}
});
};
// 评估规则
evaluateRule = (ruleType, rule) => {
const metrics = this.metrics.get(ruleType) || [];
const now = Date.now();
const windowStart = now - rule.window;
const recentMetrics = metrics.filter(m => m.timestamp >= windowStart);
if (recentMetrics.length === 0) return false;
let value;
if (ruleType === 'errorRate') {
const totalRequests = recentMetrics.length;
const errors = recentMetrics.filter(m => m.value === 1).length;
value = errors / totalRequests;
} else {
value = recentMetrics.reduce((sum, m) => sum + m.value, 0) / recentMetrics.length;
}
return rule.condition === 'greater_than' ? value > rule.threshold : value < rule.threshold;
};
// 触发告警
triggerAlert = (ruleType, rule) => {
const alert = {
type: ruleType,
threshold: rule.threshold,
timestamp: Date.now(),
severity: this.getSeverity(ruleType),
message: `${ruleType} 超过阈值 ${rule.threshold}`
};
// 发送告警
this.sendAlert(alert);
// 记录告警历史
this.alertHistory.set(ruleType, Date.now());
};
// 发送告警
sendAlert = (alert) => {
// 发送到告警平台
fetch('/api/alerts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(alert)
});
// 发送邮件/短信/钉钉等
this.sendNotification(alert);
};
}
4. 调试工具集成
远程调试工具
javascript
// 远程调试控制台
class RemoteDebugger {
constructor(wsUrl) {
this.ws = new WebSocket(wsUrl);
this.commandQueue = [];
this.ws.onopen = () => {
console.log('Remote debugger connected');
this.flushCommandQueue();
};
this.ws.onmessage = (event) => {
const command = JSON.parse(event.data);
this.executeCommand(command);
};
}
// 执行远程命令
executeCommand = (command) => {
try {
let result;
switch (command.type) {
case 'eval':
result = eval(command.code);
break;
case 'getElement':
result = document.querySelector(command.selector);
break;
case 'getConsoleHistory':
result = this.getConsoleHistory();
break;
case 'getNetworkRequests':
result = this.getNetworkRequests();
break;
default:
result = 'Unknown command';
}
this.sendResult(command.id, result);
} catch (error) {
this.sendError(command.id, error.message);
}
};
// 发送结果
sendResult = (commandId, result) => {
this.ws.send(JSON.stringify({
id: commandId,
type: 'result',
data: this.serializeResult(result)
}));
};
// 序列化结果
serializeResult = (obj) => {
try {
return JSON.stringify(obj, (key, value) => {
if (value instanceof Element) {
return {
tagName: value.tagName,
className: value.className,
id: value.id,
innerHTML: value.innerHTML.slice(0, 200)
};
}
return value;
});
} catch (error) {
return String(obj);
}
};
}
5. 性能分析工具
性能瓶颈检测
javascript
// 性能分析器
class PerformanceAnalyzer {
constructor() {
this.marks = new Map();
this.measures = [];
this.observers = [];
this.initObservers();
}
// 初始化性能观察器
initObservers = () => {
if ('PerformanceObserver' in window) {
// 监控长任务
const longTaskObserver = new PerformanceObserver((list) => {
list.getEntries().forEach((entry) => {
if (entry.duration > 50) {
this.reportLongTask(entry);
}
});
});
longTaskObserver.observe({ entryTypes: ['longtask'] });
// 监控资源加载
const resourceObserver = new PerformanceObserver((list) => {
list.getEntries().forEach((entry) => {
this.analyzeResource(entry);
});
});
resourceObserver.observe({ entryTypes: ['resource'] });
}
};
// 标记性能点
mark = (name) => {
const timestamp = performance.now();
this.marks.set(name, timestamp);
performance.mark(name);
return timestamp;
};
// 测量性能区间
measure = (name, startMark, endMark) => {
const startTime = this.marks.get(startMark);
const endTime = this.marks.get(endMark);
if (startTime && endTime) {
const duration = endTime - startTime;
this.measures.push({ name, duration, startTime, endTime });
performance.measure(name, startMark, endMark);
// 如果耗时过长,发送告警
if (duration > 1000) {
this.reportSlowOperation(name, duration);
}
return duration;
}
};
// 分析资源加载
analyzeResource = (entry) => {
const analysis = {
name: entry.name,
duration: entry.duration,
size: entry.transferSize,
type: this.getResourceType(entry.name),
cached: entry.transferSize === 0,
slow: entry.duration > 2000
};
if (analysis.slow) {
this.reportSlowResource(analysis);
}
};
// 获取性能报告
getPerformanceReport = () => {
return {
marks: Array.from(this.marks.entries()),
measures: this.measures,
navigation: performance.getEntriesByType('navigation')[0],
resources: performance.getEntriesByType('resource'),
memory: performance.memory ? {
usedJSHeapSize: performance.memory.usedJSHeapSize,
totalJSHeapSize: performance.memory.totalJSHeapSize,
jsHeapSizeLimit: performance.memory.jsHeapSizeLimit
} : null
};
};
}
6. 用户反馈收集
用户反馈组件
javascript
// 用户反馈收集器
class FeedbackCollector {
constructor() {
this.feedbackData = [];
this.isRecording = false;
this.recordingStartTime = null;
}
// 开始录制用户操作
startRecording = () => {
this.isRecording = true;
this.recordingStartTime = Date.now();
this.feedbackData = [];
// 记录页面快照
this.capturePageSnapshot();
// 监听用户操作
this.attachEventListeners();
};
// 停止录制
stopRecording = () => {
this.isRecording = false;
this.removeEventListeners();
return {
duration: Date.now() - this.recordingStartTime,
actions: this.feedbackData,
snapshot: this.pageSnapshot,
url: window.location.href,
userAgent: navigator.userAgent,
timestamp: Date.now()
};
};
// 捕获页面快照
capturePageSnapshot = () => {
this.pageSnapshot = {
html: document.documentElement.outerHTML,
styles: Array.from(document.styleSheets).map(sheet => {
try {
return Array.from(sheet.cssRules).map(rule => rule.cssText).join('\n');
} catch (e) {
return '';
}
}).join('\n'),
viewport: {
width: window.innerWidth,
height: window.innerHeight,
scrollX: window.scrollX,
scrollY: window.scrollY
}
};
};
// 记录用户操作
recordAction = (type, data) => {
if (!this.isRecording) return;
this.feedbackData.push({
type,
data,
timestamp: Date.now() - this.recordingStartTime
});
};
// 提交反馈
submitFeedback = (userFeedback) => {
const recordingData = this.stopRecording();
const feedback = {
...recordingData,
userFeedback,
userId: getCurrentUserId(),
sessionId: getSessionId()
};
return fetch('/api/feedback', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(feedback)
});
};
}
7. 最佳实践总结
监控策略
- 分层监控:应用层、网络层、基础设施层
- 关键指标:错误率、响应时间、吞吐量、可用性
- 实时告警:设置合理的阈值和告警规则
- 趋势分析:长期数据分析,发现潜在问题
问题定位流程
- 快速响应:收到告警后立即响应
- 现象分析:收集错误信息、用户反馈
- 范围确定:确定影响范围和严重程度
- 根因分析:通过日志、监控数据定位根本原因
- 解决方案:制定和执行解决方案
- 验证修复:确认问题已解决
- 复盘总结:分析问题原因,改进流程
工具推荐
- 错误监控:Sentry、Bugsnag、Rollbar
- 性能监控:New Relic、DataDog、Pingdom
- 日志管理:ELK Stack、Splunk、Fluentd
- APM工具:AppDynamics、Dynatrace、Zipkin
- 用户体验:FullStory、LogRocket、Hotjar
通过建立完善的监控体系、实时告警机制和高效的问题定位流程,可以大大提高线上问题的发现和解决效率。