刚开始是这样的
var http=require('http');
function download(url, callback) {
http.get(url, function(res) {
console.log(url);
var data = "";
res.on('data', function (chunk) {
console.log(data+'first'); //测试是否chunk没请求到
console.log(chunk);
console.log(1);
data += chunk;
console.log(data+'two');
console.log(chunk);
});
res.on("end", function() {
console.log(2);
callback(data);
});
}).on("error", function(e) {
callback(null);
});
}
exports.download = download;
然后请求到空数据时,日志前面打印为
first <Buffer 3c 73 63
> 72 69 70 74 3e 64 6f 63 75 6d 65 6e 74 2e 6c 6f 63 61 74 69 6f 6e 20
> 3d 20 22 68 74 74 70 3a 2f 2f 61 71 69 63 6e 2e 6f 72 67 2f 63 69 74
> 79 2f …> 1 <script>document.location =
> "http://aqicn.org/city/beijing/";</script>two <Buffer 3c 73 63 72 69
> 70 74 3e 64 6f 63 75 6d 65 6e 74 2e 6c 6f 63 61 74 69 6f 6e 20 3d 20
> 22 68 74 74 70 3a 2f 2f 61 71 69 63 6e 2e 6f 72 67 2f 63 69 74 79 2f
> ...> 2 而正常情况是: first <Buffer 3c 73 63 72 69 70 74 3e 64 6f 63 75 6d 65
> 6e 74 2e 6c 6f 63 61 74 69 6f 6e 20 3d 20 22 68 74 74 70 3a 2f 2f 61
> 71 69 63 6e 2e 6f 72 67 2f 63 69 74 79 2f ...> 1
> ----html文档,省略很多字-------- <Buffer------------------------> ----html文档,省略很多字------ <Buffer------------------------> 1 ----html文档,省略很多字-------- <Buffer------------------------> ----html文档,省略很多字------ <Buffer------------------------>
后来换一种方式
> var request = require('request'); function download(url){
> request.get({url:url, timeout: 3000}, function (error, response, body)
> {
> if (!error && response.statusCode == 200) {
> console.log(body);
> } else {
> console.log('not success');
> } }); }
依然有时候请求到的数据为空啊
求大神指教啊
要看你爬的对象,有的网站会防止你爬,比如用cookies之类, 或者需要执行js后才能拿到数据, 所以,先用firebug仔细的看下浏览器的请求