狂欢‰一夜 发表于 2018-12-28 07:31:19

squid内存监控脚本

  oom_of_squid用于监控服务器上的squid进程,保证对用户服务的稳定。此进程意欲以守护进程模式运行,可以这样使用:./oom_of_squid &主要功能:监控配置过的每个squid进程,保证它的内存占用看似正常;  确保进程PID和PID文件一致;
  squid进程挂掉后启动之;
  squid进程占用的内存超过设置的阀值则重启之;
  系统可用内存低于阀值,则选择一个squid进程重启之;
  默认每3秒检查一次;
  注意事项:
`echo squid7.conf`在gentoo和CentOS上的行为不一样,到CentOS下应用可能需要改为`ls squid7.conf`;  /proc/$PID/comm 在较新的内核上才有这个接口,使用旧内核需要另想办法;
  /etc/init.d/squid_multi_instance是我重写过的进程管理脚本(还有bug),使用“标准”脚本的人需要自己改一下;
  系统可用内存阀值应该适当高于系统自身的oom阀值;
  用于全内存式缓存更好,因为不用顾忌状态文件的同步。
  我设置的配置文件中,squid7.conf是carp-child,用于缓存文件;squid8.conf是carp-parent,用于决定URL在集群中的分布。
  附件为脚本。望用的上的同仁多提宝贵意见!!!
  #!/bin/bash
  # 功能:
  #    在系统可用内存很小时选取一个squid进程重启
  #    检查各进程健康状况
  MAIL="gongfan193@gmail.com"      # 设置自己的邮箱
  MAIL_FROM="oom_of_squid@gwbnsh.net.cn"
  CHECK_INTERVAL="3s"      # 检查时间间隔
  THRESHOLD_LOW_MEM="90"            # MB,低内存阀值; 必须大于vm.min_free_kbytes
  THRESHOLD_CHILD_MAX_MEM="200"    # MB,child持有内存大于此值就重启
  THRESHOLD_EMERG="100"            # MB,LEVEL="emerg"时重启进程的阀值
  LEVEL="normal"    # 内存级别,可用内存较低时设置为emerg; 正常情况下应该是空变量
  LOG="$0.log"
  # 探测配置文件,70-79之间的端口为parent,80-87为child
  CONFIG_LOCATION="/etc/squid/"
  PARENT_CONF=$(cd $CONFIG_LOCATION; echo squid7.conf)
  CHILD_CONF=$(cd $CONFIG_LOCATION; echo squid8.conf)
  ALL_CONF="$PARENT_CONF $CHILD_CONF"
  # disable killing by oom
  echo "-17" > /proc/self/oom_adj
  all_free_mem() {
  free -m | awk '/^Mem:/ {print ($4 + $6 + $7)}'    # 单位是MB
  }
  squid_instance_mem() {
  INSTANCE=${CONF%.conf}
  PID_FILE="/var/run/$INSTANCE.pid"
  [[ -e $PID_FILE ]] && PID_FROM_FILE=`cat $PID_FILE` || PID_FROM_FILE=""
  # 检查名称和进程号是否对应
  if [ -e $PID_FILE ]; then
  [[ x$PID_FROM_FILE != "x" ]] && grep -q squid /proc/$PID_FROM_FILE/comm || false
  else
  echo "$INSTANCE pid file not match to squid" >> $LOG
  fi
  # 占用的内存数量
  RSS=`awk '/^VmRSS:/ {printf "%d\n", $2/1024}' /proc/$PID_FROM_FILE/status`
  }
  keep_processes_health() {
  # 检查所有进程,如果挂了,修正一下
  for CONF in $ALL_CONF; do
  INSTANCE=${CONF%.conf}
  PID_FILE="/var/run/$INSTANCE.pid"
  STATE_FILE="/var/lib/init.d/started/$INSTANCE"
  [[ -e $PID_FILE ]] && PID_FROM_FILE=`cat $PID_FILE` || PID_FROM_FILE=""
  PID_RUNNING=`ps axo user,pid,cmd | awk '/^squid/ && /'$CONF'/ {print $2}'`
  if [ x$PID_RUNNING = "x" ]; then
  # 进程不存在: 启动进程
  process_state="not_running"
  echo "`date +%F\ %T` $INSTANCE state is $process_state, restarted" >> $LOG
  [[ -e $PID_FILE ]] && /bin/rm -f $PID_FILE
  [[ -e $STATE_FILE ]] && /bin/rm -f $STATE_FILE
  /etc/init.d/squid_multi_instance start ${INSTANCE#squid} >/dev/null 2>&1
  notify_admin &
  elif [ x$PID_FROM_FILE = "x" ]; then
  # pid文件有问题: 修正
  process_state="bad_pid_file"
  if echo $PID_RUNNING > $PID_FILE; then
  process_state="good"
  echo "fixed pid file of $INSTANCE at `date +%F\ %T`" >> $LOG
  else
  echo "can not write $PID_FILE" >> $LOG
  process_state="pid_file_not_writeable"
  notify_admin &
  fi
  elif [ x$PID_FROM_FILE != x$PID_RUNNING ]; then
  # 什么情况下会这样呢...
  process_state="pid_not_equal"
  if echo $PID_RUNNING > $PID_FILE; then
  echo "fixed $process_state of $INSTANCE at `date +%F\ %T`" >> $LOG
  notify_admin &
  else
  echo "can not write $PID_FILE" >> $LOG
  process_state="$process_state pid_file_not_writeable"
  notify_admin &
  fi
  elif [ x$PID_FROM_FILE = x$PID_RUNNING ]; then
  # 运行良好
  process_state="good"
  continue
  else
  process_state="unknow"
  notify_admin &
  fi
  done
  unset process_state
  }
  restart_process() {
  INSTANCE=${CONF%.conf}
  /etc/init.d/squid_multi_instance restart ${INSTANCE#squid} >/dev/null 2>&1
  notify_admin &
  }
  pick_and_restart_parent() {
  # 重启占用内存最大的进程
  for CONF in $PARENT_CONF; do
  squid_instance_mem
  PARENT_MEM="$PARENT_MEM\n$RSS $CONF"
  done
  PARENT_MEM_MAX=`echo -e $PARENT_MEM | sort -n | tail -1`
  if [ -n $PARENT_MEM_MAX ]; then
  CONF=`echo $PARENT_MEM_MAX | awk '{print $2}'`
  restart_process
  echo "restarted parent ${CONF%.conf} at `date +%F\ %T`" >> $LOG
  else
  echo "unknow error in pick_and_restart_parent" >> $LOG
  fi
  }
  pick_and_restart_child() {
  # 正常情况下重启内存大于300M的进程; 紧急情况下重启大于100M的进程
  for CONF in $CHILD_CONF; do
  squid_instance_mem
  if [ $RSS -gt $THRESHOLD_CHILD_MAX_MEM ]; then
  process_state="over_THRESHOLD_CHILD_MAX_MEM"
  echo "$process_state restarted child ${CONF%.conf} at `date +%F\ %T`" >> $LOG
  restart_process
  else
  true
  fi
  done
  if [ x$LEVEL = "xemerg" ]; then
  for CONF in $CHILD_CONF; do
  squid_instance_mem
  [[ $RSS -gt $THRESHOLD_EMERG ]] && (process_state="over_THRESHOLD_EMERG"; restart_process)
  done
  else
  true
  fi
  }
  pick_and_restart_one() {
  # restart a child first
  pick_and_restart_child
  # memory still low, restart a parent
  [[ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]] && pick_and_restart_parent
  # memory still low, restart all child
  [[ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]] && \
  (CONF="child"; restart_process)
  }
  notify_admin() {
  sendmail -t -f $MAIL_FROM$LOG
  pick_and_restart_one
  else
  LEVEL="normal"
  sleep $CHECK_INTERVAL
  fi
  done
  # vim: set sw=4 ts=4:

页: [1]
查看完整版本: squid内存监控脚本