如果你的网站有一定的流量,且网站存在时间久,而服务器的负载远超流量的大小,就要警惕了,现在的AI暴力爬取很严重,对于网站页面多尤其影响严重,会不断的,高频率爬取你网站上的所有内容,导致缓存被击穿,进而服务器全面过载,不论什么网站,内容是什么AI爬虫都会爬取,没有例外,
以下ssh脚本可以分析网站日志,是否是有AI爬虫高强度爬取,
#!/bin/bash bash 检测访问频率.sh
#!/bin/bash
echo "============================================"
echo "🤖 AI 爬虫访问情况统计"
echo "============================================"
LOG_FILE="替换成网站日志地址"
# 获取时间范围
echo ""
echo "⏰ 统计时间范围:"
FIRST_TIME=$(tail -2000 $LOG_FILE | head -1 | awk '{print $4}' | sed 's/\[//g')
LAST_TIME=$(tail -2000 $LOG_FILE | tail -1 | awk '{print $4}' | sed 's/\[//g')
echo " 开始: $FIRST_TIME"
echo " 结束: $LAST_TIME"
# 计算时间差(分钟)
START_MIN=$(echo $FIRST_TIME | cut -d: -f2)
START_SEC=$(echo $FIRST_TIME | cut -d: -f3)
END_MIN=$(echo $LAST_TIME | cut -d: -f2)
END_SEC=$(echo $LAST_TIME | cut -d: -f3)
TIME_DIFF=$((($END_MIN - $START_MIN) * 60 + ($END_SEC - $START_SEC)))
if [ $TIME_DIFF -lt 0 ]; then
TIME_DIFF=$((TIME_DIFF + 3600)) # 跨小时
fi
echo " 时长: 约 $TIME_DIFF 秒 ($(echo "scale=1; $TIME_DIFF/60" | bc) 分钟)"
echo ""
echo "📊 最近 2000 条记录统计:"
TOTAL=$(tail -2000 $LOG_FILE | wc -l)
SUCCESS=$(tail -2000 $LOG_FILE | grep -c " 200 ")
BLOCKED=$(tail -2000 $LOG_FILE | grep -c " 403 ")
RATE_LIMITED=$(tail -2000 $LOG_FILE | grep -c " 503 ")
echo " 总请求数: $TOTAL (平均 $(echo "scale=1; $TOTAL*60/$TIME_DIFF" | bc 2>/dev/null || echo "N/A") 次/分钟)"
echo " 成功访问 (200): $SUCCESS"
echo " 被封禁 (403): $BLOCKED ← 被 Nginx 拦截"
echo " 被限流 (503): $RATE_LIMITED ← 被 PHP 限流"
echo ""
echo "🤖 AI 爬虫统计:"
CLAUDE=$(tail -2000 $LOG_FILE | grep -c "ClaudeBot")
GPT=$(tail -2000 $LOG_FILE | grep -c "GPTBot")
AMAZON=$(tail -2000 $LOG_FILE | grep -c "Amazonbot")
AI_TOTAL=$(tail -2000 $LOG_FILE | grep -iE "claudebot|gptbot|chatgpt|amazonbot|anthropic|perplexity" | wc -l)
echo " ClaudeBot: $CLAUDE 次 ($(echo "scale=1; $CLAUDE*60/$TIME_DIFF" | bc 2>/dev/null || echo "N/A") 次/分钟)"
echo " GPTBot: $GPT 次"
echo " Amazonbot: $AMAZON 次"
echo " AI总计: $AI_TOTAL 次 (占比 $(echo "scale=1; $AI_TOTAL*100/$TOTAL" | bc)%)"
echo ""
echo "🔥 访问最频繁的 IP TOP 10:"
tail -2000 $LOG_FILE | awk '{print $1}' | sort | uniq -c | sort -nr | head -10 | \
awk -v time=$TIME_DIFF '{
rate = $1*60/time
printf "%-15s: %4d 次 (%.1f 次/分钟)\n", $2, $1, rate
}'
echo ""
echo "🤖 AI 爬虫 IP TOP 10:"
tail -2000 $LOG_FILE | grep -iE "claudebot|gptbot|amazonbot" | \
awk '{print $1}' | sort | uniq -c | sort -nr | head -10 | \
awk -v time=$TIME_DIFF '{
rate = $1*60/time
printf "%-15s: %4d 次 (%.1f 次/分钟)\n", $2, $1, rate
}'
echo ""
echo "🚫 被限流的 IP:"
BLOCKED_IPS=$(tail -2000
暂无评论...