系统环境:
SUSE Linux Enterprise Server 10 SP1 (x86_64)
问题背景:
由于线上系统环境下的crontab内容比较多,在进行日常crontab任务调度时,经常会异常挂掉而影响业务的正常使用,因此结合C和Shell写了一个简单的对crontab进行健康检测的功能。
处理思路:
修改syslog的配置参数,把crontab调度日志单独抽取出来,同时在crontab项里添加检测标记,通过后台守护进程定期检测状态标记来判断当前crontab调度是否正常,同时为了避免日志文件过大而影响性能,会定期对日志文件做切割和清理处理。
#——————————————————————————————————————————————–
1、相关目录创建
# mkdir -p /data/logs/crontab
# mkdir -p /data/scripts
# mkdir -p /data/backup/crontab
#——————————————————————————————————————————————–
2、crontab健康检测C代码
# cd /data/scripts
# vim check_cron_process.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
#ifndef __CHECK_CRON_PROCESS_H__
#define __CHECK_CRON_PROCESS_H__
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFSIZE1 1024
#define BUFFSIZE2 32
#define LOCKFILE "/var/run/check_cron_process.pid"
#define LOGFILE "/var/log/check_cron_process.log"
#define LOCKMODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
#define SYSCMD1 "ps aux | grep -w cron | grep -v grep"
#define SYSCMD2 "ps aux | grep -w cron | grep -v grep | grep defunct"
#define SYSCMD3 "tail -6 /data/logs/crontab/cron.log | grep '(root) CMD (cd'"
#define SYSCMD4 "killall -9 cron >/dev/null 2>&1"
#define SYSCMD5 "/sbin/service cron start >/dev/null 2>&1"
void already_running( void );
void init_daemon( void );
int run_system_cmd( const char *syscmd);
#endif
|
# vim check_cron_process.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <syslog.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include "check_cron_process.h"
static char buffer[BUFFSIZE1] = {0};
static char datetime[BUFFSIZE2] = {0};
int get_curr_date( char *strtime, unsigned int ustrlen)
{
struct tm *pt = NULL;
time_t timer;
if (!strtime) {
return -1;
}
time (&timer);
strtime[0] = '\0' ;
pt = localtime (&timer);
if (!pt) {
return -1;
}
memset (strtime, 0, ustrlen);
sprintf (strtime, "%04d-%02d-%02d-%02d:%02d:%02d" ,
pt->tm_year + 1900, pt->tm_mon + 1, pt->tm_mday, pt->tm_hour, pt->tm_min, pt->tm_sec);
return 0;
}
int writelog( const char *pLoginfo)
{
FILE *fp = NULL;
unsigned int ustrlen = 0;
if (pLoginfo == NULL) {
return -1;
}
ustrlen = strlen (pLoginfo);
if (ustrlen > 256) {
return -1;
}
if ((fp = fopen (LOGFILE, "a+" )) == NULL) {
return -1;
}
memset (datetime, 0, BUFFSIZE2);
get_curr_date(datetime, BUFFSIZE2);
fprintf (fp, "%s %s" , datetime, pLoginfo);
fclose (fp);
return 0;
}
int LockFile( int fd)
{
struct flock fl;
fl.l_type = F_WRLCK;
fl.l_start = 0;
fl.l_whence = SEEK_SET;
fl.l_len = 0;
return (fcntl(fd, F_SETLK, &fl));
}
void already_running( void )
{
int fd = -1;
char buf[16] = {0};
fd = open(LOCKFILE, O_RDWR | O_CREAT, LOCKMODE);
if (fd < 0) {
syslog(LOG_ERR, "can't open %s: %s" , LOCKFILE, strerror ( errno ));
exit (1);
}
if (LockFile(fd) < 0) {
if ( errno == EACCES || errno == EAGAIN) {
close(fd);
exit (1);
}
syslog(LOG_ERR, "can't lock %s: %s" , LOCKFILE, strerror ( errno ));
exit (1);
}
ftruncate(fd, 0);
sprintf (buf, "%d" , getpid());
write(fd, buf, strlen (buf));
close(fd);
}
void init_daemon( void )
{
int pid = -1;
if ((pid = fork())) {
exit (0);
} else if (pid < 0) {
exit (1);
}
setsid();
if ((pid = fork())) {
exit (0);
} else if (pid < 0) {
exit (1);
}
chdir( "/tmp" );
umask(0);
return ;
}
int run_system_cmd( const char *syscmd)
{
FILE *fp = NULL;
if (syscmd == NULL) {
return -1;
}
memset (buffer, 0, BUFFSIZE1);
snprintf(buffer, BUFFSIZE1, syscmd);
fp = popen(buffer, "r" );
if (!fp) {
return 0;
}
memset (buffer, 0, BUFFSIZE1);
if (! fgets (buffer, BUFFSIZE1, fp)) {
pclose(fp);
return 0;
}
if (!strncasecmp(buffer, "" , BUFFSIZE1)) {
pclose(fp);
return 0;
}
pclose(fp);
return 1;
}
int main( int argc, char *argv[])
{
int ret = 0;
init_daemon();
already_running();
openlog(NULL, LOG_CONS | LOG_PID, LOG_LOCAL1);
while (1) {
ret = run_system_cmd(SYSCMD1);
if (!ret) {
writelog( "The cron process is not running, now start it! \n" );
sleep(1);
system (SYSCMD5);
goto CHECK_CRON;
}
ret = run_system_cmd(SYSCMD2);
if (ret) {
writelog( "The cron process is defunct, now restart it! \n" );
sleep(1);
system (SYSCMD4);
sleep(1);
system (SYSCMD5);
goto CHECK_CRON;
}
ret = run_system_cmd(SYSCMD3);
if (!ret) {
writelog( "The cron work is down, now restart it! \n" );
sleep(1);
system (SYSCMD4);
sleep(1);
system (SYSCMD5);
goto CHECK_CRON;
}
writelog( "The cron process is ok! \n" );
CHECK_CRON:
sleep(300);
}
closelog();
return 0;
}
|
# vim Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
CC = gcc
CXX = g++
BINARY = check_cron_process
OBJS = check_cron_process.o
CFLAGS += -I/usr/include -I/usr/local/include -Wall -Wno-unused-variable
LDFLAGS = - static -O2
all:$(BINARY)
$(BINARY):$(OBJS)
$(CC) $(LDFLAGS) -o $(BINARY) $(OBJS)
$(OBJS):%.o:%.c
$(CC) $(CFLAGS) -c $^ -o $@
clean:
rm -f $(BINARY) $(OBJS)
|
# make
# /data/scripts/check_cron_process
#——————————————————————————————————————————————–
3、crontab任务调度日志配置【新增】
# vim /etc/syslog.conf
1
2
3
|
## check_crontab_start.tag.1
cron.* /data/logs/crontab/cron.log
## check_crontab_end.tag.1
|
# vim /etc/syslog-ng/syslog-ng.conf
1
2
3
4
5
6
7
8
9
10
11
|
## check_crontab_start.tag.1
destination dst_cron {
file("/data/logs/crontab/cron.log");
};
log {
source(src);
filter(f_cron);
destination(dst_cron);
};
## check_crontab_end.tag.1
|
# /sbin/service syslog restart
#——————————————————————————————————————————————–
4、crontab任务调度日志处理
(1)、日志切割与清理
# vim /data/scripts/cut_cron_log.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
#!/bin/bash
PATH= /sbin : /bin : /usr/sbin : /usr/bin : /usr/local/bin : /usr/local/sbin
LOGPATH= "/data/logs/crontab"
retval=` ps aux | grep sbin /cron | grep - v grep | wc -l`
if [ ${retval} - eq 0 ]; then
echo "The cron process is not running ^_^"
exit 1
fi
mv ${LOGPATH} /cron .log ${LOGPATH} /cron_ $( date -d "yesterday" + "%Y-%m-%d" ).log
/sbin/service syslog restart
rm -f ${LOGPATH} /cron_ $( date -d "10 days ago" + "%Y-%m-%d" ).log
|
(2)、crontab信息备份
# vim /data/scripts/backup_crontab.sh
1
2
3
4
5
6
7
8
9
10
11
12
|
#!/bin/bash
PATH= /sbin : /bin : /usr/sbin : /usr/bin : /usr/local/sbin : /usr/local/bin
CRONTAB_BACKUP_DIR= "/data/backup/crontab"
mkdir -p ${CRONTAB_BACKUP_DIR}
crontab -uroot -l > ${CRONTAB_BACKUP_DIR} /crontab_ ` date +%F`
CRONBAK=crontab_$( date -d "10 days ago" + "%Y-%m-%d" )
find ${CRONTAB_BACKUP_DIR} - type f -name ${CRONBAK} - exec rm -f {} \;
|
(3)、crontab垃圾头信息清理
# vim /data/scripts/clean_crontab_trash.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
#!/bin/bash
PATH= /sbin : /bin : /usr/sbin : /usr/bin : /usr/local/bin : /usr/local/sbin
CRONFILE= "/var/spool/cron/tabs/root"
sed -i '/# DO NOT EDIT THIS FILE/d' ${CRONFILE}
sed -i '/# (\/data\/crontab.tmp/d' ${CRONFILE}
sed -i '/# (\/tmp\/crontab/d' ${CRONFILE}
sed -i '/# (Cron version/d' ${CRONFILE}
sed -i '/# (- installed on/d' ${CRONFILE}
sed -i '/# (\/usr\/local\/agenttools/d' ${CRONFILE}
sed -i '/# (\/tmp\/cron.tmp/d' ${CRONFILE}
sed -i '/# (tmp2 installed/d' ${CRONFILE}
sed -i '/# (crontab.tmp/d' ${CRONFILE}
sed -i '/# (\/data\/crontab_/d' ${CRONFILE}
|
(4)、crontab设置
# crontab -e
## crontab日志切割与清理
00 00 * * * /data/scripts/cut_cron_log.sh >/dev/null 2>&1
## 运行状况检测标记
*/1 * * * * cd /usr/local; echo >/dev/null 2>&1
## crontab信息备份
30 08 * * * /data/scripts/backup_crontab.sh >/dev/null 2>&1
## crontab垃圾头信息清理
*/30 * * * * /data/scripts/clean_crontab_trash.sh >/dev/null 2>&1
Linux中利用crontab创建计划任务 http://www.linuxidc.com//Linux/2013-06/86401.htm
Linux中用crontab例行工作安排 http://www.linuxidc.com//Linux/2013-06/85441.htm
Linux crontab不执行问题排查 http://www.linuxidc.com//Linux/2013-06/85432.htm
Ubuntu使用crontab定时任务 http://www.linuxidc.com//Linux/2013-05/84770.htm
Linux计划任务(at batch crontab anacron) http://www.linuxidc.com//Linux/2013-03/81584.htm
Linux任务计划 (at,crontab) http://www.linuxidc.com/Linux/2015-09/122970.htm
本文永久更新链接地址:http://www.linuxidc.com/Linux/2015-09/123064.htm