很久前在网上看到获取node代理ip的例子,写了之后很不稳定,重写一下
const fs = require('fs');
const cheerio = require('cheerio');
const HttpsProxyAgent = require('https-proxy-agent');
const fetch = require('node-fetch');
const url = 'http://www.xicidaili.com/wt/'; // 获取代理ip url
const validateUrl = encodeURI('http://2017.ip138.com/ic.asp'); // 验证代理ip url
let canUse = [];
getCanUse();
function getCanUse() {
(async function () {
let ips = await getIps(url);
console.log(ips);
for(let i = 0; i < ips.length; i++) {
let ip = ips[i];
let res = await runHelper(validateProxy(ip));
if (res) {
console.log(ip + '可用');
canUse.push(ip);
} else {
console.log('验证失败');
}
}
fs.writeFile('./ipList.txt', JSON.stringify(canUse), (err) => {
err ? console.log('写入失败') : console.log('写入成功验证文件');
})
})()
}
function validateProxy(host) {
return (async function () {
try {
let h = 'http://' + host;
const options = {agent: new HttpsProxyAgent(h)}; // 设置使用代理,和超时时间
let res = await fetch(validateUrl, options).then(res => res.text());
// console.log(res);
return res.indexOf('您的IP是:[') > -1;
} catch (err) {
if (err) console.log(err);
return null;
}
})()
}
function getIps(url) {
return (async function () {
try {
let html = await fetch(url).then(res => res.text());
let $ = cheerio.load(html);
let trs = $('#ip_list tr');
let ips = [];
trs.map((i, item) => {
let td = $(item).find('td');
let ip = td.eq(1).text();
if (!validateIp(ip)) return;
let port = td.eq(2).text();
ips.push(ip + ':' + port);
});
return ips;
} catch(err) {
if (err) console.log(err);
return null;
}
})()
}
function validateIp(ip) {
return /(1[0-9][0-9]\.)|(2[0-4][0-9]\.)|(25[0-5]\.)|([1-9][0-9]\.)|([0-9]\.)/.test(ip);
}
function runHelper(fn, time = 2000) {
const limit = new Promise((resolve) => {
setTimeout(() => {
resolve(null);
}, time)
});
return Promise.race([limit, fn]);
}

本文介绍了作者在使用Node.js获取代理IP时遇到的问题,以及如何重新编写代码以提高稳定性的经验。通过实例展示了如何利用node-fetch库处理代理IP的获取过程。

3996

被折叠的 条评论
为什么被折叠?



