Java资源持续监控

说明:通过 Java 类名定时循环监控 java 进程资源以及机器内存情况占用并写入文件

例如:每 60s 记录 Spark 执行器资源使用情况:

./java_resource_monitor.sh CoarseGrainedExecutorBackend logs 60

结果:在 logs 目录中,以 PID 为名写出每个执行器资源,并将机器内存写入 free.log 中。

脚本内容:

#!/bin/bash
 
# ============================================
# 自带定时采样循环的 Java 进程资源监控脚本
# 兼容无 jps 环境
# ============================================
 
set -e
 
cd "$(dirname "$0")"
 
if [ $# -lt 2 ]; then
  echo "Usage: $0 <process_name> <output_directory> [interval_seconds] [print_to_console]"
  exit 1
fi
 
PROCESS_NAME=$1
OUTPUT_DIR=$2
INTERVAL=${3:-5}  # 默认5秒采样一次
PRINT_TO_CONSOLE=${4:false}
 
mkdir -p "$OUTPUT_DIR"
 
# 明确指定命令路径(防止非交互环境下 PATH 不完整)
PS_CMD=$(command -v ps)
GREP_CMD=$(command -v grep)
AWK_CMD=$(command -v awk)
FREE_CMD=$(command -v free)
DATE_CMD=$(command -v date)
TAIL_CMD=$(command -v tail)
 
echo "========================================="
echo " Java Resource Watcher"
echo " Process Name : $PROCESS_NAME"
echo " Output Dir   : $OUTPUT_DIR"
echo " Interval     : ${INTERVAL}s"
echo " Print Console: $PRINT_TO_CONSOLE}"
echo " CURRENT_PID  : $CURRENT_PID"
echo "========================================="
echo ""
 
 
# 获取当前脚本的PID
CURRENT_PID=$$
 
 
# 捕获 Ctrl+C 信号
trap 'echo ""; echo "Stopped by user."; exit 0' INT
 
while true; do
  CURRENT_TIME=$($DATE_CMD +"%Y-%m-%d %H:%M:%S")
 
  # ---------- 查找Java进程 ----------
  if command -v jps >/dev/null 2>&1; then
    pids=$(jps | grep -i "$PROCESS_NAME" | $AWK_CMD '{print $1}')
  else
    pids=$($PS_CMD -eo pid,cmd | $GREP_CMD '[j]ava' | $GREP_CMD -i "$PROCESS_NAME" | $AWK_CMD '{print $1}')
  fi
 
  if [ -z "$pids" ]; then
    echo "[$CURRENT_TIME] ⚠️  No Java process found for name: $PROCESS_NAME"
  else
    # ---------- 记录每个进程的资源 ----------
    for pid in $pids; do
      OUTPUT_FILE="$OUTPUT_DIR/$pid"
      # 过滤一些无关进程
      if [ "$pid" == "$CURRENT_PID" ];then
         # echo "skip current pid: $$CURRENT_PID"
         continue
      fi
 
 
      if [ -f "/proc/$pid/cmdline" ]; then
        # 读取 cmdline 并替换 null 字符为空格
        cmdline=$(cat "/proc/$pid/cmdline" | tr '\0' ' ')
        case "$cmdline" in
          *"$java "*)
            # echo "===> find. $pid $cmdline"
            if [ ! -f "$OUTPUT_FILE" ]; then
               echo "TIME                 USER       PID        CPU%       MEM%       MEM_GB     MEM_KB     VSZ_KB     CMD" > "$OUTPUT_FILE"
            fi
 
            $PS_CMD -p "$pid" -o user=,pid=,%cpu=,%mem=,rss=,vsz=,comm=,args= --no-headers | \
            $AWK_CMD -v current_time="$CURRENT_TIME" '
            {
              mem_gb = sprintf("%.2f", $5/1024/1024)
              printf "%-20s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %s\n",
                     current_time, $1, $2, $3, $4, mem_gb, $5, $6, $8
            }' >> "$OUTPUT_FILE"
            ;;
          *)
            continue
            ;;
        esac
      fi
    done
  fi
 
  # ---------- 系统内存 ----------
  FREE_LOG="$OUTPUT_DIR/free.log"
  if [ ! -f "$FREE_LOG" ]; then
    echo "TIME                TOTAL_GI   USED_GI    FREE_GI    SHARED_GI  BUFFERS_GI CACHE_GI   SWAP_TOTAL-GI SWAP_USED-GI SWAP_FREE-GI" > "$FREE_LOG"
  fi
 
  #FREE_OUTPUT=$($FREE_CMD -g | $AWK_CMD '
  #NR == 2 { mem=$2" "$3" "$4" "$5" "$6" "$7" "$8 }
  #NR == 3 { swap=$2" "$3" "$4 }
  #END { print mem" "swap }
  #')
  FREE_OUTPUT=$(free -g | awk -v current_time="$CURRENT_TIME" '
  NR == 1 {
    # 处理标题行,不输出任何内容
    next
  }
  NR == 2 {
    # 处理内存数据行(Mem行)
    # 这里输出所有需要的列:总内存、已用、空闲、共享、缓冲区、缓存
    printf "%-10s %-10s %-10s %-10s %-10s %-10s %-10s", $2, $3, $4, $5, $6, $7, $8
  }
  NR == 3 {
    # 处理交换数据行(Swap行)
    # 继续写入 swap 总量、已用、空闲
    printf " %-10s %-10s %-10s", $2, $3, $4
  }')
 
  echo "$CURRENT_TIME $FREE_OUTPUT" >> "$FREE_LOG"
 
  # ---------- 控制台摘要输出 ----------
  if [ "$PRINT_TO_CONSOLE" ]; then
    echo ""
    echo "=== Resource Usage Summary @ $CURRENT_TIME ==="
    if [ -n "$pids" ]; then
      for pid in $pids; do
        if [ -f "$OUTPUT_DIR/$pid" ]; then
          LAST_LINE=$($TAIL_CMD -n 1 "$OUTPUT_DIR/$pid")
          echo "Process $pid:"
          echo "  TIME                 USER       PID        CPU%       MEM%       MEM_GB     MEM_KB     VSZ_KB     CMD"
          echo "  $LAST_LINE"
        fi
      done
    fi
 
    if [ -f "$FREE_LOG" ]; then
      LAST_FREE_LINE=$($TAIL_CMD -n 1 "$FREE_LOG")
      echo "Memory Status:"
      echo "  TIME           TOTAL_GI   USED_GI    FREE_GI    SHARED_GI  BUFFERS_GI CACHE_GI   SWAP_TOTAL-GI SWAP_USED-GI SWAP_FREE-GI"
      echo "  $LAST_FREE_LINE"
    fi
  fi
  # ---------- 等待下一次采样 ----------
  sleep "$INTERVAL"
done
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

非理性地界生物

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值