系统环境:SUSE Linux Enterprise Server 10 SP1 (x86_64)
问题背景:由于线上系统环境下的crontab内容比较多,在进行日常crontab任务调度时,经常会异常挂掉而影响业务的正常使用,因此结合C和Shell写了一个简单的对crontab进行健康检测的功能。
处理思路:
修改syslog的配置参数,把crontab调度日志单独抽取出来,同时在crontab项里添加检测标记,通过后台守护进程定期检测状态标记来判断当前crontab调度是否正常,同时为了避免日志文件过大而影响性能,会定期对日志文件做切割和清理处理。 #--------------------------------------------------------------------------------------------------------------------------------------------
1、相关目录创建# mkdir -p /data/logs/crontab# mkdir -p /data/scripts# mkdir -p /data/backup/crontab #--------------------------------------------------------------------------------------------------------------------------------------------
2、crontab健康检测C代码# cd /data/scripts# vim check_cron_process.h
| 1234567891011121314151617181920212223242526 | #ifndef __CHECK_CRON_PROCESS_H__#define __CHECK_CRON_PROCESS_H__ #include <stdio.h>#include <stdlib.h>#include <string.h> #define BUFFSIZE1 1024#define BUFFSIZE2 32 #define LOCKFILE "/var/run/check_cron_process.pid"#define LOGFILE "/var/log/check_cron_process.log" #define LOCKMODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) #define SYSCMD1 "ps aux | grep -w cron | grep -v grep"#define SYSCMD2 "ps aux | grep -w cron | grep -v grep | grep defunct"#define SYSCMD3 "tail -6 /data/logs/crontab/cron.log | grep "(root) CMD (cd""#define SYSCMD4 "killall -9 cron >/dev/null 2>&1"#define SYSCMD5 "/sbin/service cron start >/dev/null 2>&1" void already_running(void);void init_daemon(void);int run_system_cmd(const char *syscmd); #endif |
# vim check_cron_process.c
| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 | #include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <sys/param.h>#include <sys/stat.h>#include <syslog.h>#include <fcntl.h>#include <errno.h>#include <time.h>#include "check_cron_process.h" static char buffer[BUFFSIZE1] = {0};static char datetime[BUFFSIZE2] = {0}; int get_curr_date(char *strtime, unsigned int ustrlen){ struct tm *pt = NULL; time_t timer; if (!strtime) { return -1; } time(&timer); strtime[0] = " "; pt = localtime(&timer); if (!pt) { return -1; } memset(strtime, 0, ustrlen); sprintf(strtime, "%04d-%02d-%02d-%02d:%02d:%02d", pt->tm_year + 1900, pt->tm_mon + 1, pt->tm_mday, pt->tm_hour, pt->tm_min, pt->tm_sec); return 0;} int writelog(const char *pLoginfo){ FILE *fp = NULL; unsigned int ustrlen = 0; if (pLoginfo == NULL) { return -1; } ustrlen = strlen(pLoginfo); if (ustrlen > 256) { return -1; } if ((fp = fopen(LOGFILE, "a+")) == NULL) { return -1; } memset(datetime, 0, BUFFSIZE2); get_curr_date(datetime, BUFFSIZE2); fprintf(fp, "%s %s", datetime, pLoginfo); fclose(fp); return 0;} int LockFile(int fd){ struct flock fl; fl.l_type = F_WRLCK; fl.l_start = 0; fl.l_whence = SEEK_SET; fl.l_len = 0; return (fcntl(fd, F_SETLK, &fl));} void already_running(void){ int fd = -1; char buf[16] = {0}; fd = open(LOCKFILE, O_RDWR | O_CREAT, LOCKMODE); if (fd < 0) { syslog(LOG_ERR, "can"t open %s: %s", LOCKFILE, strerror(errno)); exit(1); } if (LockFile(fd) < 0) { if (errno == EACCES || errno == EAGAIN) { close(fd); exit(1); } syslog(LOG_ERR, "can"t lock %s: %s", LOCKFILE, strerror(errno)); exit(1); } ftruncate(fd, 0); sprintf(buf, "%d", getpid()); write(fd, buf, strlen(buf)); close(fd);} void init_daemon(void){ int pid = -1; if ((pid = fork())) { exit(0); } else if (pid < 0) { exit(1); } setsid(); if ((pid = fork())) { exit(0); } else if (pid < 0) { exit(1); } chdir("/tmp"); umask(0); return;} int run_system_cmd(const char *syscmd){ FILE *fp = NULL; if (syscmd == NULL) { return -1; } memset(buffer, 0, BUFFSIZE1); snprintf(buffer, BUFFSIZE1, syscmd); fp = popen(buffer, "r"); if (!fp) { return 0; } memset(buffer, 0, BUFFSIZE1); if (!fgets(buffer, BUFFSIZE1, fp)) { pclose(fp); return 0; } if (!strncasecmp(buffer, "", BUFFSIZE1)) { pclose(fp); return 0; } pclose(fp); return 1;} int main(int argc, char *argv[]){ int ret = 0; init_daemon(); already_running(); openlog(NULL, LOG_CONS | LOG_PID, LOG_LOCAL1); while(1) { ret = run_system_cmd(SYSCMD1); if (!ret) { writelog("The cron process is not running, now start it!
"); sleep(1); system(SYSCMD5); goto CHECK_CRON; } ret = run_system_cmd(SYSCMD2); if (ret) { writelog("The cron process is defunct, now restart it!
"); sleep(1); system(SYSCMD4); sleep(1); system(SYSCMD5); goto CHECK_CRON; } ret = run_system_cmd(SYSCMD3); if (!ret) { writelog("The cron work is down, now restart it!
"); sleep(1); system(SYSCMD4); sleep(1); system(SYSCMD5); goto CHECK_CRON; } writelog("The cron process is ok!
"); CHECK_CRON: sleep(300); } closelog(); return 0;} |
# vim Makefile
| 12345678910111213141516171819 | CC = gccCXX = g++ BINARY = check_cron_processOBJS = check_cron_process.o CFLAGS += -I/usr/include -I/usr/local/include -Wall -Wno-unused-variableLDFLAGS = -static -O2 all:$(BINARY) $(BINARY):$(OBJS) $(CC) $(LDFLAGS) -o $(BINARY) $(OBJS) $(OBJS):%.o:%.c $(CC) $(CFLAGS) -c $^ -o $@ clean: rm -f $(BINARY) $(OBJS) |
# make# /data/scripts/check_cron_process #--------------------------------------------------------------------------------------------------------------------------------------------
3、crontab任务调度日志配置【新增】# vim /etc/syslog.conf
| 123 | ## check_crontab_start.tag.1cron.* /data/logs/crontab/cron.log## check_crontab_end.tag.1 |
# vim /etc/syslog-ng/syslog-ng.conf
| 1234567891011 | ## check_crontab_start.tag.1destination dst_cron { file("/data/logs/crontab/cron.log");}; log { source(src); filter(f_cron); destination(dst_cron);};## check_crontab_end.tag.1 |
# /sbin/service syslog restart #--------------------------------------------------------------------------------------------------------------------------------------------
4、crontab任务调度日志处理(1)、日志切割与清理# vim /data/scripts/cut_cron_log.sh
| 1234567891011121314151617 | #!/bin/bashPATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin LOGPATH="/data/logs/crontab" retval=`ps aux | grep sbin/cron | grep -v grep | wc -l`if [ ${retval} -eq 0 ]; then echo "The cron process is not running ^_^" exit 1fi mv ${LOGPATH}/cron.log ${LOGPATH}/cron_$(date -d "yesterday" +"%Y-%m-%d").log/sbin/service syslog restartrm -f ${LOGPATH}/cron_$(date -d "10 days ago" +"%Y-%m-%d").log |
(2)、crontab信息备份# vim /data/scripts/backup_crontab.sh
| 123456789101112 | #!/bin/bashPATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin CRONTAB_BACKUP_DIR="/data/backup/crontab" mkdir -p ${CRONTAB_BACKUP_DIR}crontab -uroot -l > ${CRONTAB_BACKUP_DIR}/crontab_`date +%F` CRONBAK=crontab_$(date -d "10 days ago" +"%Y-%m-%d")find ${CRONTAB_BACKUP_DIR} -type f -name ${CRONBAK} -exec rm -f {} ; |
(3)、crontab垃圾头信息清理# vim /data/scripts/clean_crontab_trash.sh
| 123456789101112131415 | #!/bin/bashPATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin CRONFILE="/var/spool/cron/tabs/root"sed -i "/# DO NOT EDIT THIS FILE/d" ${CRONFILE}sed -i "/# (/data/crontab.tmp/d" ${CRONFILE}sed -i "/# (/tmp/crontab/d" ${CRONFILE}sed -i "/# (Cron version/d" ${CRONFILE}sed -i "/# (- installed on/d" ${CRONFILE}sed -i "/# (/usr/local/agenttools/d" ${CRONFILE}sed -i "/# (/tmp/cron.tmp/d" ${CRONFILE}sed -i "/# (tmp2 installed/d" ${CRONFILE}sed -i "/# (crontab.tmp/d" ${CRONFILE}sed -i "/# (/data/crontab_/d" ${CRONFILE} |
(4)、crontab设置# crontab -e## crontab日志切割??清理00 00 * * * /data/scripts/cut_cron_log.sh >/dev/null 2>&1 ## 运行状况检测标记*/1 * * * * cd /usr/local; echo >/dev/null 2>&1 ## crontab信息备份30 08 * * * /data/scripts/backup_crontab.sh >/dev/null 2>&1 ## crontab垃圾头信息清理*/30 * * * * /data/scripts/clean_crontab_trash.sh >/dev/null 2>&1Linux中利用crontab创建计划任务 http://www.linuxidc.com//Linux/2013-06/86401.htmLinux中用crontab例行工作安排 http://www.linuxidc.com//Linux/2013-06/85441.htmLinux crontab不执行问题排查 http://www.linuxidc.com//Linux/2013-06/85432.htmUbuntu使用crontab定时任务 http://www.linuxidc.com//Linux/2013-05/84770.htmLinux计划任务(at batch crontab anacron) http://www.linuxidc.com//Linux/2013-03/81584.htmLinux任务计划 (at,crontab) http://www.linuxidc.com/Linux/2015-09/122970.htm
本文永久更新链接地址