Skip to content

快速发现和定位线上问题的完整解决方案

1. 建立完善的监控体系

前端监控系统

错误监控

javascript
// 全局错误捕获
window.addEventListener('error', (event) => {
  const errorInfo = {
    message: event.message,
    filename: event.filename,
    lineno: event.lineno,
    colno: event.colno,
    stack: event.error?.stack,
    timestamp: Date.now(),
    userAgent: navigator.userAgent,
    url: window.location.href
  };
  
  // 发送错误信息到监控平台
  sendErrorToMonitoring(errorInfo);
});

// Promise 错误捕获
window.addEventListener('unhandledrejection', (event) => {
  const errorInfo = {
    type: 'unhandledrejection',
    reason: event.reason,
    timestamp: Date.now(),
    url: window.location.href
  };
  
  sendErrorToMonitoring(errorInfo);
});

// 资源加载错误
window.addEventListener('error', (event) => {
  if (event.target !== window) {
    const errorInfo = {
      type: 'resource',
      tagName: event.target.tagName,
      src: event.target.src || event.target.href,
      timestamp: Date.now()
    };
    
    sendErrorToMonitoring(errorInfo);
  }
}, true);

性能监控

javascript
// 页面性能监控
const performanceMonitor = () => {
  if ('performance' in window) {
    const timing = performance.timing;
    const navigation = performance.navigation;
    
    const metrics = {
      // 页面加载时间
      loadTime: timing.loadEventEnd - timing.navigationStart,
      // DNS 查询时间
      dnsTime: timing.domainLookupEnd - timing.domainLookupStart,
      // TCP 连接时间
      tcpTime: timing.connectEnd - timing.connectStart,
      // 首字节时间
      ttfb: timing.responseStart - timing.navigationStart,
      // DOM 解析时间
      domParseTime: timing.domContentLoadedEventEnd - timing.domLoading,
      // 白屏时间
      whiteScreenTime: timing.responseStart - timing.navigationStart,
      // 首屏时间
      firstScreenTime: timing.loadEventEnd - timing.navigationStart
    };
    
    sendPerformanceData(metrics);
  }
};

// 使用 PerformanceObserver 监控更多指标
if ('PerformanceObserver' in window) {
  // 监控 LCP (Largest Contentful Paint)
  const lcpObserver = new PerformanceObserver((list) => {
    const entries = list.getEntries();
    const lastEntry = entries[entries.length - 1];
    console.log('LCP:', lastEntry.startTime);
    sendMetric('lcp', lastEntry.startTime);
  });
  lcpObserver.observe({ entryTypes: ['largest-contentful-paint'] });
  
  // 监控 FID (First Input Delay)
  const fidObserver = new PerformanceObserver((list) => {
    const entries = list.getEntries();
    entries.forEach((entry) => {
      console.log('FID:', entry.processingStart - entry.startTime);
      sendMetric('fid', entry.processingStart - entry.startTime);
    });
  });
  fidObserver.observe({ entryTypes: ['first-input'] });
}

用户行为监控

javascript
// 用户行为追踪
const userBehaviorTracker = {
  // 页面访问记录
  trackPageView: () => {
    const pageInfo = {
      url: window.location.href,
      title: document.title,
      referrer: document.referrer,
      timestamp: Date.now(),
      userId: getCurrentUserId(),
      sessionId: getSessionId()
    };
    
    sendBehaviorData('pageview', pageInfo);
  },
  
  // 点击事件追踪
  trackClick: (element) => {
    const clickInfo = {
      tagName: element.tagName,
      className: element.className,
      id: element.id,
      text: element.textContent?.slice(0, 100),
      xpath: getXPath(element),
      timestamp: Date.now()
    };
    
    sendBehaviorData('click', clickInfo);
  },
  
  // 表单提交追踪
  trackFormSubmit: (form) => {
    const formInfo = {
      action: form.action,
      method: form.method,
      fields: Array.from(form.elements).map(el => ({
        name: el.name,
        type: el.type,
        value: el.type === 'password' ? '[HIDDEN]' : el.value?.slice(0, 50)
      })),
      timestamp: Date.now()
    };
    
    sendBehaviorData('form_submit', formInfo);
  }
};

// 自动绑定事件
document.addEventListener('click', (e) => {
  userBehaviorTracker.trackClick(e.target);
});

document.addEventListener('submit', (e) => {
  userBehaviorTracker.trackFormSubmit(e.target);
});

2. 日志系统设计

结构化日志

javascript
// 日志级别定义
const LogLevel = {
  ERROR: 0,
  WARN: 1,
  INFO: 2,
  DEBUG: 3
};

// 日志记录器
class Logger {
  constructor(options = {}) {
    this.level = options.level || LogLevel.INFO;
    this.context = options.context || {};
    this.transports = options.transports || [new ConsoleTransport()];
  }
  
  // 记录错误日志
  error = (message, meta = {}) => {
    this.log(LogLevel.ERROR, message, meta);
  };
  
  // 记录警告日志
  warn = (message, meta = {}) => {
    this.log(LogLevel.WARN, message, meta);
  };
  
  // 记录信息日志
  info = (message, meta = {}) => {
    this.log(LogLevel.INFO, message, meta);
  };
  
  // 记录调试日志
  debug = (message, meta = {}) => {
    this.log(LogLevel.DEBUG, message, meta);
  };
  
  // 核心日志方法
  log = (level, message, meta) => {
    if (level > this.level) return;
    
    const logEntry = {
      timestamp: new Date().toISOString(),
      level: Object.keys(LogLevel)[level],
      message,
      meta: { ...this.context, ...meta },
      url: window.location.href,
      userAgent: navigator.userAgent,
      userId: getCurrentUserId(),
      sessionId: getSessionId(),
      traceId: generateTraceId()
    };
    
    this.transports.forEach(transport => {
      transport.log(logEntry);
    });
  };
}

// 控制台传输器
class ConsoleTransport {
  log = (entry) => {
    const method = entry.level.toLowerCase();
    console[method](`[${entry.timestamp}] ${entry.message}`, entry.meta);
  };
}

// 远程传输器
class RemoteTransport {
  constructor(endpoint) {
    this.endpoint = endpoint;
    this.buffer = [];
    this.batchSize = 10;
    this.flushInterval = 5000;
    
    setInterval(this.flush, this.flushInterval);
  }
  
  log = (entry) => {
    this.buffer.push(entry);
    
    if (this.buffer.length >= this.batchSize) {
      this.flush();
    }
  };
  
  flush = () => {
    if (this.buffer.length === 0) return;
    
    const logs = [...this.buffer];
    this.buffer = [];
    
    fetch(this.endpoint, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ logs })
    }).catch(error => {
      console.error('Failed to send logs:', error);
      // 重新加入缓冲区
      this.buffer.unshift(...logs);
    });
  };
}

3. 实时告警系统

告警规则配置

javascript
// 告警规则定义
const alertRules = {
  // 错误率告警
  errorRate: {
    threshold: 0.05, // 5%
    window: 300000, // 5分钟
    condition: 'greater_than'
  },
  
  // 响应时间告警
  responseTime: {
    threshold: 3000, // 3秒
    window: 300000,
    condition: 'greater_than'
  },
  
  // 页面加载时间告警
  pageLoadTime: {
    threshold: 5000, // 5秒
    window: 300000,
    condition: 'greater_than'
  }
};

// 告警检查器
class AlertChecker {
  constructor() {
    this.metrics = new Map();
    this.alertHistory = new Map();
    
    setInterval(this.checkAlerts, 60000); // 每分钟检查一次
  }
  
  // 添加指标数据
  addMetric = (type, value, timestamp = Date.now()) => {
    if (!this.metrics.has(type)) {
      this.metrics.set(type, []);
    }
    
    this.metrics.get(type).push({ value, timestamp });
    
    // 清理过期数据
    this.cleanupOldMetrics(type);
  };
  
  // 检查告警
  checkAlerts = () => {
    Object.entries(alertRules).forEach(([ruleType, rule]) => {
      const shouldAlert = this.evaluateRule(ruleType, rule);
      
      if (shouldAlert && !this.isAlertSuppressed(ruleType)) {
        this.triggerAlert(ruleType, rule);
      }
    });
  };
  
  // 评估规则
  evaluateRule = (ruleType, rule) => {
    const metrics = this.metrics.get(ruleType) || [];
    const now = Date.now();
    const windowStart = now - rule.window;
    
    const recentMetrics = metrics.filter(m => m.timestamp >= windowStart);
    
    if (recentMetrics.length === 0) return false;
    
    let value;
    if (ruleType === 'errorRate') {
      const totalRequests = recentMetrics.length;
      const errors = recentMetrics.filter(m => m.value === 1).length;
      value = errors / totalRequests;
    } else {
      value = recentMetrics.reduce((sum, m) => sum + m.value, 0) / recentMetrics.length;
    }
    
    return rule.condition === 'greater_than' ? value > rule.threshold : value < rule.threshold;
  };
  
  // 触发告警
  triggerAlert = (ruleType, rule) => {
    const alert = {
      type: ruleType,
      threshold: rule.threshold,
      timestamp: Date.now(),
      severity: this.getSeverity(ruleType),
      message: `${ruleType} 超过阈值 ${rule.threshold}`
    };
    
    // 发送告警
    this.sendAlert(alert);
    
    // 记录告警历史
    this.alertHistory.set(ruleType, Date.now());
  };
  
  // 发送告警
  sendAlert = (alert) => {
    // 发送到告警平台
    fetch('/api/alerts', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(alert)
    });
    
    // 发送邮件/短信/钉钉等
    this.sendNotification(alert);
  };
}

4. 调试工具集成

远程调试工具

javascript
// 远程调试控制台
class RemoteDebugger {
  constructor(wsUrl) {
    this.ws = new WebSocket(wsUrl);
    this.commandQueue = [];
    
    this.ws.onopen = () => {
      console.log('Remote debugger connected');
      this.flushCommandQueue();
    };
    
    this.ws.onmessage = (event) => {
      const command = JSON.parse(event.data);
      this.executeCommand(command);
    };
  }
  
  // 执行远程命令
  executeCommand = (command) => {
    try {
      let result;
      
      switch (command.type) {
        case 'eval':
          result = eval(command.code);
          break;
        case 'getElement':
          result = document.querySelector(command.selector);
          break;
        case 'getConsoleHistory':
          result = this.getConsoleHistory();
          break;
        case 'getNetworkRequests':
          result = this.getNetworkRequests();
          break;
        default:
          result = 'Unknown command';
      }
      
      this.sendResult(command.id, result);
    } catch (error) {
      this.sendError(command.id, error.message);
    }
  };
  
  // 发送结果
  sendResult = (commandId, result) => {
    this.ws.send(JSON.stringify({
      id: commandId,
      type: 'result',
      data: this.serializeResult(result)
    }));
  };
  
  // 序列化结果
  serializeResult = (obj) => {
    try {
      return JSON.stringify(obj, (key, value) => {
        if (value instanceof Element) {
          return {
            tagName: value.tagName,
            className: value.className,
            id: value.id,
            innerHTML: value.innerHTML.slice(0, 200)
          };
        }
        return value;
      });
    } catch (error) {
      return String(obj);
    }
  };
}

5. 性能分析工具

性能瓶颈检测

javascript
// 性能分析器
class PerformanceAnalyzer {
  constructor() {
    this.marks = new Map();
    this.measures = [];
    this.observers = [];
    
    this.initObservers();
  }
  
  // 初始化性能观察器
  initObservers = () => {
    if ('PerformanceObserver' in window) {
      // 监控长任务
      const longTaskObserver = new PerformanceObserver((list) => {
        list.getEntries().forEach((entry) => {
          if (entry.duration > 50) {
            this.reportLongTask(entry);
          }
        });
      });
      longTaskObserver.observe({ entryTypes: ['longtask'] });
      
      // 监控资源加载
      const resourceObserver = new PerformanceObserver((list) => {
        list.getEntries().forEach((entry) => {
          this.analyzeResource(entry);
        });
      });
      resourceObserver.observe({ entryTypes: ['resource'] });
    }
  };
  
  // 标记性能点
  mark = (name) => {
    const timestamp = performance.now();
    this.marks.set(name, timestamp);
    performance.mark(name);
    return timestamp;
  };
  
  // 测量性能区间
  measure = (name, startMark, endMark) => {
    const startTime = this.marks.get(startMark);
    const endTime = this.marks.get(endMark);
    
    if (startTime && endTime) {
      const duration = endTime - startTime;
      this.measures.push({ name, duration, startTime, endTime });
      performance.measure(name, startMark, endMark);
      
      // 如果耗时过长,发送告警
      if (duration > 1000) {
        this.reportSlowOperation(name, duration);
      }
      
      return duration;
    }
  };
  
  // 分析资源加载
  analyzeResource = (entry) => {
    const analysis = {
      name: entry.name,
      duration: entry.duration,
      size: entry.transferSize,
      type: this.getResourceType(entry.name),
      cached: entry.transferSize === 0,
      slow: entry.duration > 2000
    };
    
    if (analysis.slow) {
      this.reportSlowResource(analysis);
    }
  };
  
  // 获取性能报告
  getPerformanceReport = () => {
    return {
      marks: Array.from(this.marks.entries()),
      measures: this.measures,
      navigation: performance.getEntriesByType('navigation')[0],
      resources: performance.getEntriesByType('resource'),
      memory: performance.memory ? {
        usedJSHeapSize: performance.memory.usedJSHeapSize,
        totalJSHeapSize: performance.memory.totalJSHeapSize,
        jsHeapSizeLimit: performance.memory.jsHeapSizeLimit
      } : null
    };
  };
}

6. 用户反馈收集

用户反馈组件

javascript
// 用户反馈收集器
class FeedbackCollector {
  constructor() {
    this.feedbackData = [];
    this.isRecording = false;
    this.recordingStartTime = null;
  }
  
  // 开始录制用户操作
  startRecording = () => {
    this.isRecording = true;
    this.recordingStartTime = Date.now();
    this.feedbackData = [];
    
    // 记录页面快照
    this.capturePageSnapshot();
    
    // 监听用户操作
    this.attachEventListeners();
  };
  
  // 停止录制
  stopRecording = () => {
    this.isRecording = false;
    this.removeEventListeners();
    
    return {
      duration: Date.now() - this.recordingStartTime,
      actions: this.feedbackData,
      snapshot: this.pageSnapshot,
      url: window.location.href,
      userAgent: navigator.userAgent,
      timestamp: Date.now()
    };
  };
  
  // 捕获页面快照
  capturePageSnapshot = () => {
    this.pageSnapshot = {
      html: document.documentElement.outerHTML,
      styles: Array.from(document.styleSheets).map(sheet => {
        try {
          return Array.from(sheet.cssRules).map(rule => rule.cssText).join('\n');
        } catch (e) {
          return '';
        }
      }).join('\n'),
      viewport: {
        width: window.innerWidth,
        height: window.innerHeight,
        scrollX: window.scrollX,
        scrollY: window.scrollY
      }
    };
  };
  
  // 记录用户操作
  recordAction = (type, data) => {
    if (!this.isRecording) return;
    
    this.feedbackData.push({
      type,
      data,
      timestamp: Date.now() - this.recordingStartTime
    });
  };
  
  // 提交反馈
  submitFeedback = (userFeedback) => {
    const recordingData = this.stopRecording();
    
    const feedback = {
      ...recordingData,
      userFeedback,
      userId: getCurrentUserId(),
      sessionId: getSessionId()
    };
    
    return fetch('/api/feedback', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(feedback)
    });
  };
}

7. 最佳实践总结

监控策略

  1. 分层监控:应用层、网络层、基础设施层
  2. 关键指标:错误率、响应时间、吞吐量、可用性
  3. 实时告警:设置合理的阈值和告警规则
  4. 趋势分析:长期数据分析,发现潜在问题

问题定位流程

  1. 快速响应:收到告警后立即响应
  2. 现象分析:收集错误信息、用户反馈
  3. 范围确定:确定影响范围和严重程度
  4. 根因分析:通过日志、监控数据定位根本原因
  5. 解决方案:制定和执行解决方案
  6. 验证修复:确认问题已解决
  7. 复盘总结:分析问题原因,改进流程

工具推荐

  • 错误监控:Sentry、Bugsnag、Rollbar
  • 性能监控:New Relic、DataDog、Pingdom
  • 日志管理:ELK Stack、Splunk、Fluentd
  • APM工具:AppDynamics、Dynatrace、Zipkin
  • 用户体验:FullStory、LogRocket、Hotjar

通过建立完善的监控体系、实时告警机制和高效的问题定位流程,可以大大提高线上问题的发现和解决效率。

最近更新