I am tring to download around 20.000 images from a website by scraping it.
(I am authorized by the owner)
The path of the image is composed like this:
VolumeId/ChapterId/PageId.jpg
There are around 100 volumes, and every volume has x chapters, and every chapter has y pages.
In the database I have stored for every volume the number of chapters, but i don't have
the number of pages, so I have to navigate to the image url and check if it exists.
I know for sure that there are less than 30 pages per chapter so I did something like this:
let exists = true;
for (let i = 0; i < 30 && exists; i++) {
fetch(`imgUrl/${i}.jpg`)
.then(data => {
if (data.ok)
return data.arrayBuffer();
else
exists = false;
.then(arrayBuffer => {
if (exists) {
let buffer = Buffer.from(arrayBuffer );
if (!fs.existsSync(path.join(__dirname, imgPath))) {
fs.mkdirSync(path.join(__dirname, imgPath), {
recursive: true,
});
}
fs.writeFile(
path.join(__dirname, imgPath + "/" + img + ".jpg"),
buffer,
(err) => {
if (err) throw err;
}
);
}
});
}
The problem:
The problem is that the loop does not wait for the image to be fetched and saved locally.
I have tried with async/await and promises (I think I have implemented them wrong)
Is there a better way to download a large quantity of data? Maybe with streams?
It can be a little bit complicated to implement your code with just async/await and at the same time assure the "exists" condition between iterations, I suggest you use a class that implements an async iterator, refer to the official documentation for more details, the following code achieve what you are looking for (note: the code snippet you provided didn't show where "imgPath" is coming from so just fix my code accordingly) :
class FetchImages {
constructor(urls) {
this.urls = urls;
this.index = 0;
}
[Symbol.asyncIterator]() {
const urlsIterator = this.urls[Symbol.iterator]();
return {
async next() {
if (++index == 30) {
return {
done: true
};
}
const iteratorResult = urlsIterator.next();
if (iteratorResult.done) {
return { done: true };
}
const url = iteratorResult.value;
try {
let response = await fetch(url);
let data;
if (response.status == 200) {
data = await response.arrayBuffer();
} else {
// equivalent to exists == false, exit condition of the iterator
return {
done: true
};
};
let buffer = Buffer.from(data);
if (!fs.existsSync(path.join(__dirname, imgPath))) {
fs.mkdirSync(path.join(__dirname, imgPath), {
recursive: true,
});
}
fs.writeFileSync(
path.join(__dirname, imgPath),
buffer,
);
return {
done: false,
value: imgPath
};
} catch (err) {
return {
done: false,
value: err.message
};
}
}
}
}
}
(async function () {
const fetchImages = new FetchImages(urls);
for await (const fetchImage of fetchImages) {
// check status of each fetch
console.log(fetchImage);
}
})();
I'm trying to intigrate Stripe 3ds flow to the project. Everything goes ok, but when you do nothing after pop-up appearing for about 5 seconds it closes automaticly.
Here is a part of my code:
const sendServerReq = async (id) {
const { status, {data: payment_intent_secret} } = await purchaseRequest(id);
if (status === 'requires_action' && payment_intent_secret) {
const {
error: errorAction,
paymentIntent
} = await stripe.confirmCardPayment(payment_intent_secret);
if (errorAction) {
onError(errorAction.message);
return;
}
await sendServerReq(paymentIntent.id);
} else {
return onSuccess();
}
}
So I have been working quite a while with protractor and I have found out that I am having issue having error message etc. if I don't find a element by 60 sec then I will just get a thrown error for timeout. Which is not a really good way to see whats the issue actually and I am here asking you guys how I am able to put my own error message etc that this specific element has not been found or something like that.
I have coded something like this.
Test case class:
const userData = require("../globalContent.json");
const Page = require("../objects/ikeaProductPage.obj");
describe("Product page", function () {
ikeaPage = new Page();
for (let [link, amount] of Object.entries(userData[browser.baseUrl])) {
// The Ikea page is accessible by the specified URL
it(`Is defined by the URL: ${link}`,
async function() {
await Page.navigateDesktop(`${link}`);
});
// page has a quantity label and it can be filled out with user data
it("Has a label quantity that can receive user data",
async function() {
await Page.fillFormWithUserData(`${amount}`);
});
// Details page allows the user to add to cart
it("Enables resolution of added to cart",
async function() {
await Page.hasAddToShoppingCart();
});
// Details page allows the user to proceed to the next stage when page has been resolved
it("Allows the user to proceed to the next stage of add to cart",
async function() {
await Page.hasAddedToBag();
await browser.sleep(1000);
});
}
});
Object class:
const utils = require("../utils/utils");
const Specs = require("../specs/ProductPage.specs");
module.exports = class Page {
constructor() {
const _fields = {
amountInput: Specs.productAmount
};
const _formButtons = {
addToCart: ikeaSpecs.addToCart
};
const _productFrame = {
cartIcon: ikeaSpecs.cartIcon,
addedToCartIcon: Specs.addedToCart,
};
this.getFields = function() {
return _fields;
};
this.getFormButtons = function() {
return _formButtons;
};
this.getFrame = function() {
return _productFrame;
};
}
getForm() {
return {
fields: this.getFields(),
buttons: this.getFormButtons(),
};
}
getPageFrame() {
return {
buttons: {
iconFrames: this.getFrame()
}
};
}
//Navigate for Desktop
async navigateDesktop(URL) {
await browser.waitForAngularEnabled(false);
await browser.manage().window().maximize();
await browser.get(URL);
}
//Fill qty from globalContent.json
async fillFormWithUserData(amountQuantity) {
const formFields = this.getForm().fields.amountInput;
await formFields.clear();
await utils.sendKeys(formFields, amountQuantity);
}
//Check if we can add to shopping cart
async hasAddToShoppingCart() {
const formButton = this.getForm().buttons.addToCart;
await utils.elementToBeClickable(formButton);
await utils.click(formButton);
}
//Check if the product has been added
async hasAddedToBag() {
const frameCartIcon = this.getPageFrame().buttons.iconFrames.cartIcon;
const frameAddedToCart = this.getPageFrame().buttons.iconFrames.addedToCartIcon;
await utils.presenceOf(frameCartIcon);
await utils.elementToBeClickable(frameAddedToCart);
}
};
utils:
const utils = function () {
var EC = protractor.ExpectedConditions;
this.presenceOf = function (params) {
return browser.wait(EC.presenceOf(params));
};
this.elementToBeClickable = function (params) {
return browser.wait(EC.elementToBeClickable(params));
};
this.sendKeys = function (params, userData) {
return params.sendKeys(userData);
};
this.click = function (params) {
return browser.executeScript("arguments[0].click();", params.getWebElement());
};
this.switch = function (params) {
return browser.switchTo().frame(params.getWebElement());
};
this.switchDefault = function () {
return browser.switchTo().defaultContent();
};
};
module.exports = new utils();
and I wonder etc how I can set any more correctly errors instead of just timeouts?
Since you're using browser.wait under the hood, then you want to consider using one of it's parameters. As the page suggests, it takes 3 parameters, and all are useful:
browser.wait(
() => true, // this is your condition, to wait for (until the function returns true)
timeout, // default value is jasmineNodeOpts.defaultTimeoutInterval, but can be any timeout
optionalMessage // this is what you're looking for
)
updated
So if I use all three it'll look like this
this.presenceOf = function (params, message) {
return browser.wait(
EC.presenceOf(params),
jasmine.DEFAULT_TIMEOUT_INTERVAL,
`Element ${params.locator().toString()} is not present. Message: ${message}`
)
};
when you call it, like this
await utils.presenceOf(frameCartIcon, 10000, "frame should be populated");
and it fails, you'd get this stack
- Failed: Element By(css selector, "some css") is not present. Message: frame should be populated
Wait timed out after 1002ms
Wait timed out after 1002ms
at /Users/spleshakov/Documents/ui-automation/node_modules/selenium-webdriver/lib/promise.js:2201:17
at ManagedPromise.invokeCallback_ (/Users/spleshakov/Documents/ui-automation/node_modules/selenium-webdriver/lib/promise.js:1376:14)
at TaskQueue.execute_ (/Users/spleshakov/Documents/ui-automation/node_modules/selenium-webdriver/lib/promise.js:3084:14)
at TaskQueue.executeNext_ (/Users/spleshakov/Documents/ui-automation/node_modules/selenium-webdriver/lib/promise.js:3067:27)
at asyncRun (/Users/spleshakov/Documents/ui-automation/node_modules/selenium-webdriver/lib/promise.js:2927:27)
at /Users/spleshakov/Documents/ui-automation/node_modules/selenium-webdriver/lib/promise.js:668:7
at processTicksAndRejections (internal/process/next_tick.js:81:5)
From: Task: Element By(css selector, "some css") is not present. Message: frame should be populated
I have an Azure Function that inserts from JSON-LD into my GraphDB - however what I'm finding is my GraphDB keeps crashing, because the function is sending too many insert requests.
I've set "maxConcurrentRequests": 1 and "maxOutstandingRequests": 1 however it still doesn't seem to wait and process one at a time.
Could someone please explain why this so.
export async function onTrigger(context: Context, documents: Resource[] | null): Promise<void> {
if (documents == null) { return }
documents.forEach(async function (value: any) {
if ("metadata" in value) { } else { return; }
let song: MyDocument = value.metadata.songs.song;
// Create the JSON-LD object using the song object from above
let jsonld = ...
let nQuads = await jsonldParser.toRDF(jsonld, {format: 'application/n-quads'});
let insertQuery = "INSERT DATA {" + nQuads + "}";
try {
let res = await axios.post('http://localhost:7200/mygraphdb', "update="+encodeURIComponent(insertQuery))
if (res.status === 204) {
console.log(`All Done!`);
}
} catch (error) {
console.log(`Error! ${song.id}`);
}
});
}
I am getting those errors and warning in my console after trying to create a PWA - Progressive Web App out of my website using this tutorial.
The FetchEvent for
"https://www.googletagmanager.com/gtag/js?id=UA-4562443-3" resulted in
a network error response: the promise was rejected. Promise.then
(async) (anonymous) # service-worker.js:228 service-worker.js:1
Uncaught (in promise) fetch failed 1:21 GET
https://www.googletagmanager.com/gtag/js?id=UA-4562443-3
net::ERR_FAILED The FetchEvent for
"https://fonts.googleapis.com/css?family=Open+Sans:300,400&display=swap&subset=cyrillic"
resulted in a network error response: the promise was rejected.
Promise.then (async) (anonymous) # service-worker.js:228
service-worker.js:1 Uncaught (in promise) fetch failed 1:28 GET
https://fonts.googleapis.com/css?family=Open+Sans:300,400&display=swap&subset=cyrillic
net::ERR_FAILED The FetchEvent for
"https://widget.uservoice.com/VuHfPZ0etI2eQ4REt1tiUg.js" resulted in a
network error response: the promise was rejected. Promise.then (async)
(anonymous) # service-worker.js:228 service-worker.js:1 Uncaught (in
promise) fetch failed 1:894 GET
https://widget.uservoice.com/VuHfPZ0etI2eQ4REt1tiUg.js net::ERR_FAILED
It actually works pretty well. I am able to get a fully working PWA icon in Audits in Chrome Dev Tools. Which is great, but after a refresh I am getting all those errors. My service-worker.js which is located at root of my website looks like this
"use strict";
const SERVICE_WORKER_VERSION = "REPLACED_WITH_SERVICE_WORKER_VERSION"; // updated with tools/service_worker_version.js (String)
const CACHE_VERSION = SERVICE_WORKER_VERSION;
//const fileNamesToSaveInCache = ["/"];
const HOME = "/";
const OFFLINE_ALTERNATIVE = "/offline";
const fileNamesToSaveInCache = [];
const fileNamesToSaveInCacheProd = [
OFFLINE_ALTERNATIVE,
"/",
"/publics/img/favicon/fav.gif",
"/publics/css/style.css",
"/publics/css/searchhelp.css",
"/publics/css/Helpa.css",
];
const rtcLength = 4; // "rtc/".length;
const rtcFetchDelay = 10000;//ms
const origin = location.origin;
const answerFromfileName = {};
const resolveFromfileName = {};
const rejectFromfileName = {};
const timeOutIdFromfileName = {};
let logLater = [];
// todo put all into single container
const resolveFetchFromPeerToPeer = function (fileName) {
clearTimeout(timeOutIdFromfileName[fileName]);
resolveFromfileName[fileName](answerFromfileName[fileName]);
delete answerFromfileName[fileName];//stop listening
delete resolveFromfileName[fileName];
delete rejectFromfileName[fileName];
};
const rejectFetchFromPeerToPeer = function (fileName, reason) {
if (rejectFromfileName[fileName]) {
rejectFromfileName[fileName](reason);
delete resolveFromfileName[fileName];
delete rejectFromfileName[fileName];
}
};
const fetchFromPeerToPeer = function (customRequestObject) {
/*asks all page for a fileName*/
const fileName = customRequestObject.header.fileName;
const promise = new Promise(function (resolve, reject) {
resolveFromfileName[fileName] = resolve;
rejectFromfileName[fileName] = reject;
if (answerFromfileName.hasOwnProperty(fileName)) {
resolveFetchFromPeerToPeer(fileName);
}
timeOutIdFromfileName[fileName] = setTimeout(function() {
rejectFetchFromPeerToPeer(fileName, "No answer after 10 seconds");
}, rtcFetchDelay);
});
self.clients.matchAll().then(function(clientList) {
clientList.forEach(function(client) {
client.postMessage(customRequestObject);
});
});
return promise;
};
const logInTheUI = (function () {
//console.log("logInTheUI function exists");
return function (what) {
console.log(what);
self.clients.matchAll().then(function(clientList) {
clientList.forEach(function(client) {
client.postMessage({LOG: JSON.parse(JSON.stringify(what))});
});
});
};
}());
const logInTheUIWhenActivated = function (what) {
logLater.push(what);
};
const fetchFromMainServer = function (request, options = {}) {
/*wrap over fetch. The problem with fetch here, it doesn't reject properly sometimes
see if statement below*/
return fetch(request, options).then(function (fetchResponse) {
// console.log("fetchFromMainServer:", fetchResponse.ok, fetchResponse);
// logInTheUI([request, options]);
if ((!fetchResponse) || (!fetchResponse.ok)) {
return Promise.reject("fetch failed");
}
return fetchResponse;
});
};
const fetchFromCache = function (request) {
return caches.open(CACHE_VERSION).then(function (cache) {
return cache.match(request).then(function (CacheResponse) {
//console.log("fetchFromCache:", CacheResponse.ok, CacheResponse);
if ((!CacheResponse) || (!CacheResponse.ok)) {
return Promise.reject("Not in Cache");
}
return CacheResponse;
});
});
};
const isLocalURL = function (url) {
return !(String(url).match("rtc"));
};
const fillServiceWorkerCache2 = function () {
/*It will not cache and also not reject for individual resources that failed to be added in the cache. unlike fillServiceWorkerCache which stops caching as soon as one problem occurs. see http://stackoverflow.com/questions/41388616/what-can-cause-a-promise-rejected-with-invalidstateerror-here*/
return caches.open(CACHE_VERSION).then(function (cache) {
return Promise.all(
fileNamesToSaveInCache.map(function (url) {
return cache.add(url).catch(function (reason) {
return logInTheUIWhenActivated([url + "failed: " + String(reason)]);
});
})
);
});
};
const latePutToCache = function (request, response) {
return caches.open(CACHE_VERSION).then(function(cache) {
cache.put(request, response.clone());
return response;
});
};
const deleteServiceWorkerOldCache = function () {
return caches.keys().then(function (cacheVersions) {
return Promise.all(
cacheVersions.map(function (cacheVersion) {
if (CACHE_VERSION === cacheVersion) {
//console.log("No change in cache");
} else {
//console.log("New SERVICE_WORKER_VERSION of cache, delete old");
return caches.delete(cacheVersion);
}
})
);
});
};
const useOfflineAlternative = function () {
return fetchFromCache(new Request(OFFLINE_ALTERNATIVE));
};
const isAppPage = function (url) {
/*appPage does not work offline, and we don't serve it if offline
returns Boolean*/
return (origin + HOME) === url;
};
self.addEventListener("install", function (event) {
/*the install event can occur while another service worker is still active
waitUntil blocks the state (here installing) of the service worker until the
promise is fulfilled (resolved or rejected). It is useful to make the service worker more readable and more deterministic
save in cache some static fileNames
this happens before activation */
event.waitUntil(
fillServiceWorkerCache2()
.then(skipWaiting)
);
});
self.addEventListener("activate", function (event) {
/* about to take over, other service worker are killed after activate, syncronous
a good moment to clear old cache*/
event.waitUntil(deleteServiceWorkerOldCache().then(function() {
//console.log("[ServiceWorker] Skip waiting on install caches:", caches);
return self.clients.claim();
}));
});
self.addEventListener("message", function (event) {
const message = event.data;
/*
if (message.hasOwnProperty("FUTURE")) {
console.log(message.FUTURE);
return;
}
*/
const fileName = message.fileName;
const answer = message.answer;
answerFromfileName[fileName] = answer;
//console.log(fileName, answer, resolveFromfileName);
if (resolveFromfileName.hasOwnProperty(fileName)) {//
resolveFetchFromPeerToPeer(fileName);
}
});
self.addEventListener("fetch", function (fetchEvent) {
/* fetchEvent interface FetchEvent
see https://www.w3.org/TR/service-workers/#fetch-event-interface
IMPORTANT: fetchEvent.respondWith must be called inside this handler immediately
synchronously fetchEvent.respondWith must be called with a response object or a
promise that resolves with a response object. if fetchEvent.respondWith is called
later in a callback the browser will take over and asks the remote server directly, do not do that
why have fetchEvent.respondWith( and not respond with the return value of the callback function ?
-->
It allows to do other thing before killing the service worker, like saving stuff in cache
*/
const request = fetchEvent.request;//Request implements Body;
//const requestClone = request.clone(); //no need to clone ?
const url = request.url;
if (logLater) {
logLater.forEach(logInTheUI);
logLater = undefined;
}
// logInTheUI(["fetch service worker " + SERVICE_WORKER_VERSION, fetchEvent]);
// Needs to activate to handle fetch
if (isLocalURL(url)) {
//Normal Fetch
if (request.method === "POST") {
// logInTheUI(["POST ignored", request]);
return;
}
// logInTheUI(["Normal Fetch"]);
fetchEvent.respondWith(
fetchFromCache(request.clone()).then(function (cacheResponse) {
/* cannot use request again from here, use requestClone */
//console.log(request, url);
return cacheResponse;
}).catch(function (reason) {
// We don't have it in the cache, fetch it
// logInTheUI(fetchEvent);
return fetchFromMainServer(request);
}).then(function (mainServerResponse) {
if (isAppPage(url)) {
return mainServerResponse;
}
return latePutToCache(request, mainServerResponse).catch(
function (reason) {
/*failed to put in cache do not propagate catch, not important enough*/
return mainServerResponse;
}
);
}).catch(function (reason) {
if (isAppPage(url)) {
//if it is the landing page that is asked
return useOfflineAlternative();
//todo if we are offline , display /offline directly
}
return Promise.reject(reason);
})
);
} else {
// Peer to peer Fetch
//console.log(SERVICE_WORKER_VERSION, "rtc fetch" url:", fetchEvent.request.url);
// request, url are defined
const method = request.method;
const requestHeaders = request.headers;
//logInTheUI(["Special Fetch"]);
const customRequestObject = {
header: {
fileName: url.substring(url.indexOf("rtc/") + rtcLength),
method
},
body: ""
};
requestHeaders.forEach(function (value, key) {
//value, key correct order
//is there a standard way to use Object.assign with Map like iterables ?
//todo handle duplicates
//https://fetch.spec.whatwg.org/#terminology-headers
customRequestObject.header[key] = value;
});
//console.log(request);
fetchEvent.respondWith(
/*should provide the peer the full request*/
request.arrayBuffer().then(function (bodyAsArrayBuffer) {
const bodyUsed = request.bodyUsed;
if (bodyUsed && bodyAsArrayBuffer) {
customRequestObject.body = bodyAsArrayBuffer;
}
}).catch(function (reason) {
/*console.log("no body sent, a normal GET or HEAD request has no body",
reason);*/
}).then(function (notUsed) {
return fetchFromPeerToPeer(customRequestObject);
}).then(function (response) {
const responseInstance = new Response(response.body, {
headers: response.header,
status: response.header.status || 200,
statusText : response.header.statusText || "OK"
});
return responseInstance;
}).catch(function (error) {
const responseInstance = new Response(`<html><p>${error}</p></html>`,
{
headers: {
"Content-type": "text/html"
},
status: 500,
statusText : "timedout"
});
return responseInstance;
})
);
}
/*here we could do more with event.waitUntil()*/
});
I am guessing the problem comes from loading those external libraries. So, this is my code loading those libraries.
// Include the UserVoice JavaScript SDK (only needed once on a page)
UserVoice = window.UserVoice || [];
(function() {
var uv = document.createElement('script');
uv.type = 'text/javascript';
uv.async = true;
uv.src = '//widget.uservoice.com/VuHfPZ0etI2eQ4REt1tiUg.js';
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(uv, s)
})();
//
// UserVoice Javascript SDK developer documentation:
// https://www.uservoice.com/o/javascript-sdk
//
// Set colors
UserVoice.push(['set', {
accent_color: '#448dd6',
trigger_color: 'white',
trigger_background_color: 'rgba(46, 49, 51, 0.6)'
}]);
// Identify the user and pass traits
// To enable, replace sample data with actual user traits and uncomment the line
UserVoice.push(['identify', {
//email: 'john.doe#example.com', // User’s email address
//name: 'John Doe', // User’s real name
//created_at: 1364406966, // Unix timestamp for the date the user signed up
//id: 123, // Optional: Unique id of the user (if set, this should not change)
//type: 'Owner', // Optional: segment your users by type
//account: {
// id: 123, // Optional: associate multiple users with a single account
// name: 'Acme, Co.', // Account name
// created_at: 1364406966, // Unix timestamp for the date the account was created
// monthly_rate: 9.99, // Decimal; monthly rate of the account
// ltv: 1495.00, // Decimal; lifetime value of the account
// plan: 'Enhanced' // Plan name for the account
//}
}]);
// Add default trigger to the bottom-right corner of the window:
UserVoice.push(['addTrigger', {mode: 'contact', trigger_position: 'bottom-right'}]);
// Or, use your own custom trigger:
//UserVoice.push(['addTrigger', '#id', { mode: 'contact' }]);
// Autoprompt for Satisfaction and SmartVote (only displayed under certain conditions)
UserVoice.push(['autoprompt', {}]);
});//ready
#import url('https://fonts.googleapis.com/css?family=Open+Sans:300,400&display=swap&subset=cyrillic');
#font-face {
font-family: 'fa-solid-900';
font-display: swap;
src: url(https://use.fontawesome.com/releases/v5.8.2/webfonts/fa-solid-900.woff2) format('woff2');
}
#font-face {
font-family: 'fa-brands-400';
font-display: swap;
src: url(https://use.fontawesome.com/releases/v5.8.2/webfonts/fa-brands-400.woff2) format('woff2');
}
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-number-3"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-number-3');
</script>
What should i do in order to fix those errors. This is my first try in PWA so i am lost.
I end up using Workbox and everything is working great now.