I want to create a task that only builds the entire project if something changed. For that, I am comparing hashes (irrelevant to the question).
const buildIfChanged = async () => {
const hash = await getHash();
const newHash = await getNewHash();
if (hash !== newHash) {
console.log("START");
const task = series(build, cleanup)();
console.log("END", task);
}
};
In this example, task is undefined, so I cannot add a .on("end", ...) and resolve the promise after that. I also cannot await it.
The problem is, because I am not waiting for it to complete, the buildIfChanged task completes before build even has a chance to run.
Is there any way to do this with modern gulpfiles?
I have found a solution on how to do this in modern gulpfiles. The function call of parallel and series actually take a done function as a parameter.
So to solve this, you can do:
const buildIfChanged = async () => {
return new Promise(async (res, rej) => {
try {
const hash = await getHash();
const newHash = await getNewHash();
if (hash !== newHash) {
console.log("START");
const task = series(build, cleanup)(() => res()); // add the done function here
console.log("END", task);
}
} catch(e) {
rej(e);
}
})
};
Related
I have a nodejs cluster with a primary that handles worker cycles (in the while loop) and that listens to worker messages to progress in the cycle.
(In my code index.js does not send messages on setInterval but on other type of event, I have here simplified the code to get the essence of the problem)
Server.js
var cluster = require('cluster');
const ClusterMessages = require('cluster-messages');
const messages = new ClusterMessages();
if (cluster.isMaster){
let worker = cluster.fork()
console.log(cluster);
(async ()=>{
let cycle = 0
while(true){
console.log(cycle);
cycle ++
await Promise.all([
enough(),
])
}
function enough () {
return new Promise(resolve => {
messages.on('enough', () => {
console.log('enough');
resolve()
});
});
}})()
} else {
require('./index.js')
}
Index.js
const ClusterMessages = require('cluster-messages');
const messages = new ClusterMessages();
setInterval(() => {
messages.send('enough');
}, 1000);
The code is working fine (as such, in this example and in my code) but there seems to be a memory leak as you can understand from the output of this code:
0
enough
1
enough
enough
2
enough
enough
enough
3
enough
enough
enough
enough...
I tried several things like exchanging new Promise and messages.on(), add a return in the callback of the promise but I have no clue what is happening here. Any ideas?
The solution is to make another event that can be triggered once contrary to the 'event listener' of the cluster-messages package
Server.js
if (cluster.isMaster){
let worker = cluster.fork()
console.log(cluster);
// Importing events
const EventEmitter = require('events');
const eventEmitter = new EventEmitter();
messages.on('enough', () => {
eventEmitter.emit('event');
});
(async ()=>{let cycle = 0
while(true){
console.log(cycle);
cycle ++
await Promise.all([
enough(),
])
}
function enough () {
return new Promise(resolve => {
eventEmitter.once('event', () => {
console.log('event');
resolve()
});
});
}})()
} else {
require('./index.js')
}
Index.js
const ClusterMessages = require('cluster-messages');
const messages = new ClusterMessages();
setInterval(() => {
messages.send('enough');
}, 1000);
Every call of enough() installs another listener for the enough event on messages. They never get removed, leaking memory (and leading to an increasing number of logs per event). Instead, use the once method to install the listener:
function enough () {
return new Promise(resolve => {
messages.once('enough', () => {
// ^^^^
console.log('enough');
resolve();
});
});
}
Or even simpler, using once:
const { once } = require('events');
function enough() {
return once(messages, 'enough');
}
In your particular example, I would recommend not to use promises to handle the events. You might even miss events that are fired while you are removing and re-attaching a listener. Just write
let cycle = 0
messages.on('enough', () => {
console.log(cycle);
cycle++;
});
If for some reason you need a loop that you can break from or await other things in, I would recommend an asynchronous iterator, built with on:
const { once } = require('events');
(async () => {
let cycle = 0
for await (const _ of on(messages, 'enough')) {
console.log(cycle);
cycle++;
}
})();
I'm with Node.js and TypeScript and I'm using async/await.
This is my test case:
async function doSomethingInSeries() {
const res1 = await callApi();
const res2 = await persistInDB(res1);
const res3 = await doHeavyComputation(res1);
return 'simle';
}
I'd like to set a timeout for the overall function. I.e. if res1 takes 2 seconds, res2 takes 0.5 seconds, res3 takes 5 seconds I'd like to have a timeout that after 3 seconds let me throw an error.
With a normal setTimeout call is a problem because the scope is lost:
async function doSomethingInSeries() {
const timerId = setTimeout(function() {
throw new Error('timeout');
});
const res1 = await callApi();
const res2 = await persistInDB(res1);
const res3 = await doHeavyComputation(res1);
clearTimeout(timerId);
return 'simle';
}
And I cannot catch it with normal Promise.catch:
doSomethingInSeries().catch(function(err) {
// errors in res1, res2, res3 will be catched here
// but the setTimeout thing is not!!
});
Any ideas on how to resolve?
You can use Promise.race to make a timeout:
Promise.race([
doSomethingInSeries(),
new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 11.5e3))
]).catch(function(err) {
// errors in res1, res2, res3 and the timeout will be caught here
})
You cannot use setTimeout without wrapping it in a promise.
Ok I found this way:
async function _doSomethingInSeries() {
const res1 = await callApi();
const res2 = await persistInDB(res1);
const res3 = await doHeavyComputation(res1);
return 'simle';
}
async function doSomethingInSeries(): Promise<any> {
let timeoutId;
const delay = new Promise(function(resolve, reject){
timeoutId = setTimeout(function(){
reject(new Error('timeout'));
}, 1000);
});
// overall timeout
return Promise.race([delay, _doSomethingInSeries()])
.then( (res) => {
clearTimeout(timeoutId);
return res;
});
}
Anyone errors?
The things that smells a bit to me is that using Promises as asynchronicity strategy will send us to allocate too many object that some other strategy needs but this is off-topic.
Problem with #Bergi answer that doSomethingInSeries continues executing even if you already rejected the promise. It is much better to cancel it.
LATEST ANSWER
You can try use AbortController for that. Check the old answer to see how to use it - api is similar.
Keep in mind that task is not cancelled immediately, so continuation (awaiting, then or catch) is not called exactly after timeout.
To guarantee that you can combine this and #Bergi approach.
OLD ANSWER
This is how it should look like:
async const doSomethingInSeries = (cancellationToken) => {
cancellationToken.throwIfCancelled();
const res1 = await callApi();
cancellationToken.throwIfCancelled();
const res2 = await persistInDB(res1);
cancellationToken.throwIfCancelled();
const res3 = await doHeavyComputation(res1);
cancellationToken.throwIfCancelled();
return 'simle';
}
Here is simple implementation:
const makeCancellationToken = (tag) => {
let cancelled = false;
return {
isCancelled: () => cancelled,
cancel: () => {
cancelled = true;
},
throwIfCancelled: () => {
if (cancelled) {
const error = new Error(`${tag ?? 'Task'} cancelled`);
error.cancelled = true;
throw error;
}
}
}
}
And finally usage:
const cancellationToken = makeCancellationToken('doSomething')
setTimeout(cancellationToken.cancel, 5000);
try {
await doSomethingInSeries(cancellationToken);
} catch (error) {
if (error.cancelled) {
// handle cancellation
}
}
Keep in mind that task is not cancelled immediately, so continuation (awaiting, then or catch) is not called exactly after 5 secs.
To guarantee that you can combine this and #Bergi approach.
I have a quick question here. Is there a way to stop a for loop and wait for one index to be done before the next one can execute? I'll try to be clear as much as possible so apologize if my question and my code aren't clear enough. Plus, I will give more explanation for whatever clues that you can give me. Much appreciate
So I try to get each id to put on my API, in order to trigger the script to be run. However, the requirement is 1 script has to be done first before the next one can be trigger.
I was messing around with setTimeOut() but looks like it doesn't give me what I need. And sorry guys, my project isn't allowed me to work with any external libraries and either async await
function sampleScriptRunning(activity){
var ID = activity.map(function(activity){return activity.ID})
for(var i = 0; i < ID.length; i++){
if(ID[i] === 'undefined'){
console.log('No more scripts to run')
return;
}
Api.getActivityStatus(ID[i]).then(function(response){
return $http.post('/proxy/api/nodejsjob/' + response.sample.sample[0].ScriptId + '/run')
})
If you're trying to manage asynchronous calls in series, you have two options - Promise objects, or managing callbacks yourself.
One powerful tool you can use is the mapSeries functionality of bluebird, a npm module for promise management. That way, you can execute the commands only one at a time without any other management:
const Promise = require('bluebird');
const ids = activities.map((activity) => activity.id);
return Promise.mapSeries(ids, (id) => {
return Api.getActivityStatus(ID[i])
.then(() => $http.post('/proxy/api/nodejsjob/' + response.sample.sample[0].ScriptId + '/run'))
});
Here is how you can wrap you work in an async IIFE and then use the await statement to sync your async.
// This is just a dummy Async function, it will randomly return (to simulate true async issues)
function doAsync(i) {
return new Promise((resolve, reject) => {
setTimeout(() => { resolve(`here is - ${i}`) }, Math.random()*1000);
});
}
// This function when called will execute an IIFE to allow us to use await.
function workSyncronously() {
(async () => {
for(let i=0; i<10; i++) {
let message = await doAsync(i);
console.log(message);
}
})();
}
workSyncronously();
So lets look at something where we need the results of two functions before continuing:
Slight tweak to the code, now we are using the result of our first call to drive the second call.
// This is just a dummy Async function, it will randomly return (to simulate true async issues)
function doAsync(i) {
return new Promise((resolve, reject) => {
setTimeout(() => { resolve(i*2) }, Math.random()*1000);
});
}
// This function when called will execute an IIFE to allow us to use await.
function workSyncronously() {
(async () => {
for(let i=0; i<10; i++) {
let timeTwo = await doAsync(i);
let timeFour = await doAsync(timeTwo);
console.log(`${i} *2=${timeTwo} *4=${timeFour}`);
}
})();
}
workSyncronously();
So you code might look something like this:
function sampleScriptRunning(activity) {
const IDArray = activity.map({
ID
} => ID);
const postResponses = [];
(asnyc() => {
for (let id of IDArray) {
let response = await Api.getActivityStatus(id);
let postResponse = await $http.post('/proxy/api/nodejsjob/' + response.sample.sample[0].ScriptId + '/run');
postResponses.push(postResponse);
}
return postResponses;
})();
}
I have a stream and I need to convert it to a generator, so an uploader can consume the generic generator.
This means turning:
stream.on('data', chunk => ...);
to:
generator = streamGenerator(stream);
chunk = await generator.next()
...
better yet:
chunk = yield streamGenerator;
Overall my best attempt requires leaking the resolve from a promise and I'd like to avoid that:
function streamToIterable(chunkSize, stream) {
let collector = [];
let value = [];
let done = false;
let _resolve;
let promise = new Promise(resolve => _resolve = resolve);
stream.on('data', chunk => {
collector = collector.concat(chunk);
if (value.length >= chunkSize) {
value = collector.splice(0, chunkSize);
_resolve(value);
stream.pause();
}
});
stream.on('end', () => {
_resolve(collection);
// With done set to true, the next iteration well ignore 'value' and end the loop
done = true;
});
stream.resume();
return {
next: () => ({
value: promise.then(() => {
stream.resume();
promise = new Promise(resolve => _resolve = resolve);
}),
done,
}),
};
}
function* streamToGenerator(stream) {
const iterator = streamToIterable(stream);
let next = iterator.next();
while (!next.done) {
yield next.value;
}
};
Usage in a generator for uploading chunks:
for (const chunkData of generator()) {
let result = yield uploadPost(url, formData, onChunkProgress(chunkIndex));
This is in a redux-saga, so "next()" isn't called on the generator until the return promise is resolved.
You cannot avoid storing the resolve function in a mutable variable if you want to use a single event listener that resolves different promises. You could simplify the promise creation by using the once method similar to the following:
function streamToIterator(stream) {
let done = false;
const end = new Promise(resolve => {
stream.once('end', resolve);
}).then(e => {
done = true;
});
return {
[Symbol.iterator]() { return this; }
next() {
const promise = new Promise(resolve => {
stream.once('data', value => {
resolve(value);
stream.pause();
});
stream.resume();
});
return {
value: Promise.race([promise, end]),
done,
};
}),
};
}
Of course, you are doing the racing between end and data yourself, you resume the stream before next is called the first time and most importantly you do the chunking yourself, so this might to be applicable to your situation.
Apart from that, I'd recommend to check out the buffering internals of node.js streams, it might be easier to read chunks of certain sizes using a lower-level API than data events.
Also you definitely should have a look at the asynchronous iteration proposal for es-next. The iterable interface you're trying to implement is very similar, and surely they either already have or really would welcome an example of making a node readablestream iterable.
EDIT: this answer is only required if you have a volatile stream that doesn't pause right away, and therefor also doesn't have an event system that supports "once". It also allows asyncronous yielding.
I greatly changed my previous answer and this one works.
This one uses two arrays; one of promises and another of resolves, which allows a queue of data that is bi-directional.
So if you iterate faster than the stream, all promises well resolve when they receive data and also if you stream faster than you iterate, you'll have promises to resolve from the iterator.
function streamToAsyncIterator(chunkSize, stream) {
let done = false;
let endPromise = new Promise(resolve => {
//flush out the last data.
stream.on('end', () => {
resolve({ value: collector, done: false });
});
});
//two-track queue for expecting and sending data with promises
let dataPromises = [];
let dataResolves = [];
stream.on('data', value => {
const dataResolve = dataResolves.shift();
if (dataResolve) {
dataResolve({ value, done: false });
} else {
dataPromises.push(Promise.resolve({ value, done: false }));
}
stream.pause();
});
return {
[Symbol.asyncIterator]() {
return this;
},
//TODO handle return() to close the stream
next() {
if (done) return Promise.resolve({ done });
stream.resume();
let dataPromise = dataPromises.shift();
if (!dataPromise) {
dataPromise = new Promise(resolve => dataResolves.push(resolve));
}
return Promise.race([dataPromise, endPromise])
// done must be set in the resolution of the race, or done could complete the generator before the last iteration of data.
.then(next => {
if (next.done) {
done = true;
next.done = false;
}
return next;
});
},
};
}
async function* streamToAsyncGenerator(chunkSize, stream) {
const iterator = streamToAsyncIterator(chunkSize, stream);
let next = await iterator.next();
while (!next.done) {
yield next.value;
// Delete is needed to release resouces
// Without delete, you'll get a memory error at 2GB.
delete next.value;
next = await iterator.next();
}
};
EDIT: I removed the collector, which has nothing to do with the question and I added the delete which is necessary, because GC doesn't appear to run with an array of iterators. This should be the final answer as it works swell for me.
I am trying to scrape some links with headless-chrome/puppeteer while scrolling down like this:
let interval
const linkScraper = async () => {
return new Promise(async (resolve,reject) => {
interval = setInterval(async () => {
const visiblePosts = await page.$$("div[class*='wrapper']")
const data = await handleVisiblePosts(visiblePosts)
allPosts = {...allPosts, ...data}
await scroll()
const stop = await areWeAtTheBottom()
if (stop) {
console.log('STOPPING')
clearInterval(interval)
resolve()
}
}, 100);
})
}
problem? clearInterval doesn't actually stop the interval. stopping gets printed multiple times.
I suspect it could also be because setinterval is async, which it needs to be in order to use await.
I can find the following possible reasons why your interval would not get stopped:
You are never getting to the stop condition.
You are overwriting the interval variable somehow so the actual interval you want to stop is no longer saved.
You are getting a rejected promise.
There does not appear to be any reason why the interval variable needs to be outside the linkScraper function and putting it inside the function will prevent it from getting overwritten in any way.
With this many await calls, it seems wise to add a try/catch to catch any rejected promises and stop the interval if there's an error.
If you see the STOPPING being logged, then you are apparently hitting the stop condition so it appears it would have to be an overwritten interval variable.
Here's a version that cannot overwrite the interval variable and makes a few other changes for code cleanliness:
const linkScraper = async () => {
return new Promise((resolve, reject) => {
const interval = setInterval(async () => {
try {
const visiblePosts = await page.$$("div[class*='wrapper']");
const data = await handleVisiblePosts(visiblePosts);
allPosts = { ...allPosts, ...data};
await scroll();
const stop = await areWeAtTheBottom();
if (stop) {
console.log('STOPPING');
clearInterval(interval);
resolve();
}
} catch(e) {
clearInterval(interval);
reject(e);
}
}, 100);
});
}
In cleaning up this code, I ran into a couple questions:
Do all four of your functions that you use await with actually return a promise?
And, where is allPosts declared?
EDIT: Just discovered another issue. setInterval() isn't aware of the await calls inside your function. Remember, the outer function doesn't actually block. It returns immediately as soon as you hit an await. That means that you can get another setInterval() callback while you were still processing the async operations of the first one. That will mess things up. Here's a way around that:
function delay(t) {
return new Promise(resolve => {
setTimeout(resolve, t);
});
}
const linkScraper = () => {
console.log("starting linkScraper");
async function run() {
const visiblePosts = await page.$$("div[class*='wrapper']");
const data = await handleVisiblePosts(visiblePosts);
allPosts = { ...allPosts, ...data};
await scroll();
const stop = await areWeAtTheBottom();
if (stop) {
console.log('STOPPING');
return "stop";
}
return "continue";
}
return run().then(result => {
if (result === "continue") {
return delay(100).then(run);
}
})
});
}
I accepted jfriend00's solution as it pointed me in the right direction, my slightly modified, final and working version looks like this:
const linkScraper = async () => {
return new Promise(async (resolve, reject) => {
const run = async () => {
console.log("running")
const visiblePosts = await page.$$("div[class*='wrapper']");
const data = await handleVisiblePosts(visiblePosts);
allPosts = {...allPosts, ...data};
await scroll();
const stop = await areWeAtTheBottom();
if (stop) {
console.log('STOPPING');
resolve()
} else {
await page.waitFor(100)
await run()
}
}
await run()
})
}