From e5d64e7153e3dc0534ba26eec4d79ed74165661b Mon Sep 17 00:00:00 2001 From: thisLight Date: Fri, 31 Dec 2021 20:12:43 +0800 Subject: [PATCH] pre-fetched wikipedia summary --- package.json | 4 ++- scripts/wikipedia.js | 78 ++++++++++++++++++++++++++++++++++++++++++-- yarn.lock | 17 ++++++++++ 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 394efe0..6a25584 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "version": "6.0.0" }, "dependencies": { + "axios": "^0.24.0", "hexo": "^6.0.0", "hexo-feed": "^1.1.0", "hexo-generator-archive": "^1.0.0", @@ -23,7 +24,8 @@ "hexo-renderer-stylus": "^2.0.0", "hexo-server": "^2.0.0", "hexo-tag-steamgame": "^1.0.0", - "hexo-theme-landscape": "^0.0.3" + "hexo-theme-landscape": "^0.0.3", + "rate-limiter-flexible": "^2.3.6" }, "devDependencies": {} } diff --git a/scripts/wikipedia.js b/scripts/wikipedia.js index 366b0e6..46e3ff8 100644 --- a/scripts/wikipedia.js +++ b/scripts/wikipedia.js @@ -1,3 +1,6 @@ +// Requires axios and rate-limiter-flexible +var axios = require("axios"); +var RateLimiterMemory = require("rate-limiter-flexible").RateLimiterMemory; function buildArgsHash(args) { let argsHash = {}; @@ -9,6 +12,67 @@ function buildArgsHash(args) { return argsHash; } +let gRateLimiter = new RateLimiterMemory({ + points: 200, + duration: 1, +}); // Per wikipedia's recommendation, 200 reqs per second + +async function requestWikipediaRequest(baseUrl) { + while (true) { + try { + await gRateLimiter.consume(baseUrl, 1); + break; + } catch (limited) { + if (limited instanceof Error) { + throw limited; + } else { + await new Promise(resolve => { + setTimeout(resolve, limited.msBeforeNext); + }); + } + } + } +} + +async function requestWikipediaDone(baseUrl) { + await gRateLimiter.reward(baseUrl, 1); +} + +async function generatePrefetchedWikipediaTagHtml(args, content) { + const argsHash = buildArgsHash(args); + const title = argsHash['title']; + + const lang = argsHash['lang'] !== undefined ? argsHash['lang'] : 'en'; + const baseUrl = `https://${lang}.wikipedia.org`; + + const url = `${baseUrl}/api/rest_v1/page/summary/${title}`; + await requestWikipediaRequest(baseUrl); + let response = await axios.get(url, { + headers: { + 'accept': 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/Summary/1.4.2"', + 'api-user-agent': `Hexo Wikipedia Tag (Prefetch, from ${hexo.config.url})`, + }, + responseType: 'json', + timeout: 3000, + params: { + redirect: false, + }, + transitional: { + silentJSONParsing: false, + }, + }).catch(reason => { + hexo.log.warn(`fetch failed for "${url}": ${reason}`); + return Promise.reject(reason); + }); + await requestWikipediaDone(baseUrl); + let extractedText = response.data.extract; + let contentText = extractedText; + if (argsHash['wikiButton'] === 'true') { + contentText += `

Wikipedia:${title}

`; + } + return `
${contentText}
` +} + function generateWikipediaTagHtml(args, content) { const argsHash = buildArgsHash(args); const title = argsHash['title']; @@ -18,7 +82,7 @@ function generateWikipediaTagHtml(args, content) { const url = `${baseUrl}/api/rest_v1/page/summary/${title}?redirect=false`; - const tagId = Math.round(Math.random() * 100000); + const tagId = "wikipedia-"+Math.round(Math.random() * 100000); const embeddedScript = ` window.addEventListener('load', function() { var element = document.getElementById('${tagId}'); @@ -34,15 +98,25 @@ function generateWikipediaTagHtml(args, content) { req.open('GET', '${url}'); req.responseType = 'json'; req.setRequestHeader('accept', 'application/json; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/Summary/1.4.2"'); + req.setRequestHeader('api-user-agent', 'Hexo Wikipedia Tag (from ${hexo.config.url})'); req.send(); }); `; let contentText = ``; if (argsHash['wikiButton'] === 'true') { contentText += `

Wikipedia:${title}

`; + } else { + contentText += ``; } return `
${contentText}
`; } -hexo.extend.tag.register('wikipedia', generateWikipediaTagHtml); +function wikipediaTag(args, content) { + // We use the client-fetching method as fallback + return generatePrefetchedWikipediaTagHtml(args, content).catch(reason => { + return generateWikipediaTagHtml(args, content); + }); +} + +hexo.extend.tag.register('wikipedia', wikipediaTag, {async: true}); diff --git a/yarn.lock b/yarn.lock index 9150c95..a94bc35 100644 --- a/yarn.lock +++ b/yarn.lock @@ -92,6 +92,13 @@ atob@^2.1.2: resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9" integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg== +axios@^0.24.0: + version "0.24.0" + resolved "https://registry.yarnpkg.com/axios/-/axios-0.24.0.tgz#804e6fa1e4b9c5288501dd9dff56a7a0940d20d6" + integrity sha512-Q6cWsys88HoPgAaFAVUb0WpPk0O8iTeisR9IMqy9G8AbO4NlpVknrnQS03zzF9PGAWgO3cgletO3VjV/P7VztA== + dependencies: + follow-redirects "^1.14.4" + balanced-match@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" @@ -429,6 +436,11 @@ finalhandler@1.1.2: statuses "~1.5.0" unpipe "~1.0.0" +follow-redirects@^1.14.4: + version "1.14.6" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.6.tgz#8cfb281bbc035b3c067d6cd975b0f6ade6e855cd" + integrity sha512-fhUl5EwSJbbl8AR+uYL2KQDxLkdSjZGR36xy46AO7cOMTrCMON6Sa28FmAnC2tRTDbd/Uuzz3aJBv7EBN7JH8A== + fresh@0.5.2: version "0.5.2" resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7" @@ -1048,6 +1060,11 @@ range-parser@~1.2.1: resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031" integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg== +rate-limiter-flexible@^2.3.6: + version "2.3.6" + resolved "https://registry.yarnpkg.com/rate-limiter-flexible/-/rate-limiter-flexible-2.3.6.tgz#b1a2549dca91069c8a33d57c08a27262c0356c60" + integrity sha512-8DVFOe89rreyut/vzwBI7vgXJynyYoYnH5XogtAKs0F/neAbCTTglXxSJ7fZeZamcFXZDvMidCBvps4KM+1srw== + readdirp@~3.6.0: version "3.6.0" resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.6.0.tgz#74a370bd857116e245b29cc97340cd431a02a6c7"