JavaScript

超轻量级php框架startmvc

node.js 抓取代理ip实例代码

更新时间:2020-05-09 21:54 作者:startmvc
node.js实现抓取代理ip主要文件:index.js/**支持:node.jsv7.9.0*/constcheerio=require('cheerio');constfetch=

node.js实现抓取代理ip

主要文件:index.js


/*
* 支持:node.js v7.9.0
*/
const cheerio=require('cheerio');
const fetch =require('node-fetch');
const Promise=require('bluebird');
let mongoose=require('mongoose');

Promise.promisifyAll(mongoose);
let Schema=mongoose.Schema;
mongoose.connect('mongodb://localhost:27017/ipproxypool');
let IPpool=new Schema({
 ip:{type:String,unique:true}
})
let Ipproxy=mongoose.model('IP',IPpool);

function fetchUrl(url){
 fetch(url,{
 method:'get',
 headers:{
 }
 })
 .then(res=>res.text())
 .then(body=>{
 let $=cheerio.load(body);
 let length=$('#list table tbody').find('tr').length;
 for (let i=0;i<length;i++){
 let ipaddress= $('#list table tbody').find('tr').eq(i).find('td').eq(0).text() ;
 let port = $('#list table tbody').find('tr').eq(i).find('td').eq(1).text();
 console.log(`IP:${ipaddress}:${port}`);
 let ip=`${ipaddress}:${port}`
 let ippool=new Ipproxy({
 ip:ip
 })
 ippool.save();
 }
 })
}

var sleep = function (time) {
 return new Promise(function (resolve, reject) {
 setTimeout(function () {
 resolve('ok');
 }, time);
 })
};
const pageNumber=10;
var start = async function(){
 for(let j=1;j<pageNumber;j++){
 console.log(`当前是第${j}次等待..`);
 fetchUrl(`http://www.kuaidaili.com/free/inha/${j}/`);
 await sleep(1500);
 }
}
start();

包支持 : package.json


{
 "name": "demo-4-ipproxypool",
 "version": "1.0.0",
 "description": "",
 "main": "index.js",
 "scripts": {
 "test": "echo \"Error: no test specified\" && exit 1"
 },
 "author": "false-l",
 "license": "",
 "devDependencies": {
 "babel-preset-es2015": "^6.24.1",
 "babel-preset-react": "^6.24.1",
 "babel-preset-stage-3": "^6.24.1"
 },
 "dependencies": {
 "babel-core": "^6.24.1",
 "bluebird": "^3.5.0",
 "cheerio": "^0.22.0",
 "koa": "^2.2.0",
 "koa-router": "^7.1.1",
 "mongoose": "^4.9.6",
 "node-fetch": "^1.6.3"
 }
}

本地需要安装mongodb数据库,用于存储抓取到的ip,目前还未实现ip验证。写这个主要是处于好奇。

上面的代码就可以实现抓取ip代理网站的ip并存到mongodb数据库中。

下面在放出一个基于koa2的api接口的简易服务器实现

server


const Promise=require('bluebird');
let mongoose=require('mongoose');
const koa=require('koa');
const app=new koa();
var router = require('koa-router')();
Promise.promisifyAll(mongoose);
let Schema=mongoose.Schema;
mongoose.connect('mongodb://localhost:27017/ipproxypool');
let IPpool=new Schema({
 ip:{type:String,unique:true}
})
let Ipproxy=mongoose.model('IP',IPpool);

app.use(async (ctx, next) => {
 await next();
 var data=await Ipproxy.find({},function(err,ips){
 var ipmap=[];
 ips.forEach(function(ip){
 ipmap[ip._id]=ip;
 //console.log(ip)
 });
 })
 var map=data.map(ip=>ip.ip);
 ctx.response.type = 'text/json';
 ctx.response.body = map;
});
app.listen(3000);
console.log('server listen:3000')

至于为什么既有promise又有async,是因为对异步语法还不是很熟,怎么会怎么写了。

使用方式:

 根据package.json

npm install   // 安装支持

node index.js  //获取代理 ip

node server.js  //运行简易ip接口

感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!