pond2539 发表于 2016-12-27 07:08:25

nginx中禁止屏蔽网络爬虫

server {
listen       80;
server_namewww.xxx.com;
#charset koi8-r;
#access_loglogs/host.access.logmain;
#location / {
#    root   html;
#    indexindex.html index.htm;
#}
if ($http_user_agent ~* "qihoobot|Baiduspider|Googlebot|Googlebot-Mobile|Googlebot-Image|Mediapartners-Google|Adsbot-Google|Feedfetcher-Google|Yahoo! Slurp|Yahoo! Slurp China|YoudaoBot|Sosospider|Sogou spider|Sogou web spider|MSNBot|ia_archiver|Tomato Bot") {
return 403;
}
location ~ ^/(.*)$ {
proxy_pass http://localhost:8080;
proxy_redirect          off;
proxy_set_header      Host $host;
proxy_set_header      X-Real-IP $remote_addr;
proxy_set_header       X-Forwarded-For   $proxy_add_x_forwarded_for;
client_max_body_size    10m;
client_body_buffer_size 128k;
proxy_connect_timeout   90;
proxy_send_timeout      90;
proxy_read_timeout      90;
proxy_buffer_size       4k;
proxy_buffers         4 32k;
proxy_busy_buffers_size 64k;
proxy_temp_file_write_size 64k;
}
#error_page404            /404.html;
# redirect server error pages to the static page /50x.html
#
error_page   500 502 503 504/50x.html;
location = /50x.html {
root   html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
#    proxy_pass   http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
#    root         html;
#    fastcgi_pass   127.0.0.1:9000;
#    fastcgi_indexindex.php;
#    fastcgi_paramSCRIPT_FILENAME/scripts$fastcgi_script_name;
#    include      fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
#    denyall;
#}
}
  可以用 curl 测试一下



curl -I -A "qihoobot" www.xxx.com

页: [1]
查看完整版本: nginx中禁止屏蔽网络爬虫