I am tring to download around 20.000 images from a website by scraping it.
(I am authorized by the owner)
The path of the image is composed like this:
VolumeId/ChapterId/PageId.jpg
There are around 100 volumes, and every volume has x chapters, and every chapter has y pages.
In the database I have stored for every volume the number of chapters, but i don't have
the number of pages, so I have to navigate to the image url and check if it exists.
I know for sure that there are less than 30 pages per chapter so I did something like this:
let exists = true;
for (let i = 0; i < 30 && exists; i++) {
fetch(`imgUrl/${i}.jpg`)
.then(data => {
if (data.ok)
return data.arrayBuffer();
else
exists = false;
.then(arrayBuffer => {
if (exists) {
let buffer = Buffer.from(arrayBuffer );
if (!fs.existsSync(path.join(__dirname, imgPath))) {
fs.mkdirSync(path.join(__dirname, imgPath), {
recursive: true,
});
}
fs.writeFile(
path.join(__dirname, imgPath + "/" + img + ".jpg"),
buffer,
(err) => {
if (err) throw err;
}
);
}
});
}
The problem:
The problem is that the loop does not wait for the image to be fetched and saved locally.
I have tried with async/await and promises (I think I have implemented them wrong)
Is there a better way to download a large quantity of data? Maybe with streams?
It can be a little bit complicated to implement your code with just async/await and at the same time assure the "exists" condition between iterations, I suggest you use a class that implements an async iterator, refer to the official documentation for more details, the following code achieve what you are looking for (note: the code snippet you provided didn't show where "imgPath" is coming from so just fix my code accordingly) :
class FetchImages {
constructor(urls) {
this.urls = urls;
this.index = 0;
}
[Symbol.asyncIterator]() {
const urlsIterator = this.urls[Symbol.iterator]();
return {
async next() {
if (++index == 30) {
return {
done: true
};
}
const iteratorResult = urlsIterator.next();
if (iteratorResult.done) {
return { done: true };
}
const url = iteratorResult.value;
try {
let response = await fetch(url);
let data;
if (response.status == 200) {
data = await response.arrayBuffer();
} else {
// equivalent to exists == false, exit condition of the iterator
return {
done: true
};
};
let buffer = Buffer.from(data);
if (!fs.existsSync(path.join(__dirname, imgPath))) {
fs.mkdirSync(path.join(__dirname, imgPath), {
recursive: true,
});
}
fs.writeFileSync(
path.join(__dirname, imgPath),
buffer,
);
return {
done: false,
value: imgPath
};
} catch (err) {
return {
done: false,
value: err.message
};
}
}
}
}
}
(async function () {
const fetchImages = new FetchImages(urls);
for await (const fetchImage of fetchImages) {
// check status of each fetch
console.log(fetchImage);
}
})();
Related
I have some code like so:
export async function handleRefresh() {
if (!existsSync('postr.toml')) fail('not a postr directory');
const posts = expandGlob('posts/*');
for await (const post of posts) {
if (!post.isDirectory) {
console.warn('warning: non-folder found in posts directory');
continue;
}
let {parsedFrontMatter, contents} = extractFrontMatter(await read(post.path + '/post.md'));
const adapters = parsedFrontMatter.adapters ?? [];
if (!parsedFrontMatter) {
fail('no frontmatter for ' + post.path);
continue;
}
if (!Array.isArray(adapters)) {
fail('adapters is not an array');
continue;
}
if (isValidFrontMatter(parsedFrontMatter)) {
fail('frontmatter is not valid');
continue;
}
adapters.forEach(async (adapter: string) => {
const adapterPlugins = parseToml(await read('postr.toml')).adapterPlugins ?? {};
if (!isObject(adapterPlugins)) {
fail('adapterPlugins in the configuration is not an object');
return;
}
const adapterPath = adapterPlugins[adapter];
if (!adapterPath) {
console.warn('warn: an adapter was set' +
'but the corresponding plugin was not configured in `postr.toml`. Skipping');
return;
}
if (!('path' in <any>adapterPath)) {
fail(`adapter ${adapter} does not have a path`);
return;
}
import((<any>adapterPath).path)
.then(async module => {
const action = getActionForPost(parsedFrontMatter);
if (module[action]) {
await module[action](contents, parsedFrontMatter, (<any>adapterPath).config, {
updateFrontMatter(newData: {[x: string]: any}) {
parsedFrontMatter = Object.assign(parsedFrontMatter, newData);
},
mapID(remote: string | number) {
addMapping(parsedFrontMatter.id as string, remote.toString(), adapter);
}
})
} else {
console.warn(`Adapter ${adapter} does not support action \`${action}\``);
return;
}
writeFinalContents(parsedFrontMatter, contents, post.path)
})
.catch(error => fail(`could not run adapter because of ${error.name}: ${error.message}`));
});
}
}
Huge function.
There are a lot of these necessary if checks. 3/4 of the function is if checks, you could say. I want some advice on how I could refactor these statements.
As you can see the checks are not always the same, there are some different checks going on there.
EDIT: I've added real code.
I'm using an HTTP-triggered Firebase cloud function to make an HTTP request. I get back an array of results (events from Meetup.com), and I push each result to the Firebase realtime database. But for each result, I also need to make another HTTP request for one additional piece of information (the category of the group hosting the event) to fold into the data I'm pushing to the database for that event. Those nested requests cause the cloud function to crash with an error that I can't make sense of.
const functions = require("firebase-functions");
const admin = require("firebase-admin");
admin.initializeApp();
const request = require('request');
exports.foo = functions.https.onRequest(
(req, res) => {
var ref = admin.database().ref("/foo");
var options = {
url: "https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=****",
json: true
};
return request(
options,
(error, response, body) => {
if (error) {
console.log(JSON.stringify(error));
return res.status(500).end();
}
if ("results" in body) {
for (var i = 0; i < body.results.length; i++) {
var result = body.results[i];
if ("name" in result &&
"description" in result &&
"group" in result &&
"urlname" in result.group
) {
var groupOptions = {
url: "https://api.meetup.com/" + result.group.urlname + "?sign=true&photo-host=public&key=****",
json: true
};
var categoryResult = request(
groupOptions,
(groupError, groupResponse, groupBody) => {
if (groupError) {
console.log(JSON.stringify(error));
return null;
}
if ("category" in groupBody &&
"name" in groupBody.category
) {
return groupBody.category.name;
}
return null;
}
);
if (categoryResult) {
var event = {
name: result.name,
description: result.description,
category: categoryResult
};
ref.push(event);
}
}
}
return res.status(200).send("processed events");
} else {
return res.status(500).end();
}
}
);
}
);
The function crashes, log says:
Error: Reference.push failed: first argument contains a function in property 'foo.category.domain._events.error' with contents = function (err) {
if (functionExecutionFinished) {
logDebug('Ignoring exception from a finished function');
} else {
functionExecutionFinished = true;
logAndSendError(err, res);
}
}
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1436:15)
at /user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1479:13
at Object.forEach (/user_code/node_modules/firebase-admin/node_modules/#firebase/util/dist/index.node.cjs.js:837:13)
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1462:14)
at /user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1479:13
at Object.forEach (/user_code/node_modules/firebase-admin/node_modules/#firebase/util/dist/index.node.cjs.js:837:13)
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1462:14)
at /user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1479:13
at Object.forEach (/user_code/node_modules/firebase-admin/node_modules/#firebase/util/dist/index.node.cjs.js:837:13)
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1462:14)
If I leave out the bit for getting the group category, the rest of the code works fine (just writing the name and description for each event to the database, no nested requests). So what's the right way to do this?
I suspect this issue is due to the callbacks. When you use firebase functions, the exported function should wait on everything to execute or return a promise that resolves once everything completes executing. In this case, the exported function will return before the rest of the execution completes.
Here's a start of something more promise based -
const functions = require("firebase-functions");
const admin = require("firebase-admin");
admin.initializeApp();
const request = require("request-promise-native");
exports.foo = functions.https.onRequest(async (req, res) => {
const ref = admin.database().ref("/foo");
try {
const reqEventOptions = {
url:
"https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=xxxxxx",
json: true
};
const bodyEventRequest = await request(reqEventOptions);
if (!bodyEventRequest.results) {
return res.status(200).end();
}
await Promise.all(
bodyEventRequest.results.map(async result => {
if (
result.name &&
result.description &&
result.group &&
result.group.urlname
) {
const event = {
name: result.name,
description: result.description
};
// get group information
const groupOptions = {
url:
"https://api.meetup.com/" +
result.group.urlname +
"?sign=true&photo-host=public&key=xxxxxx",
json: true
};
const categoryResultResponse = await request(groupOptions);
if (
categoryResultResponse.category &&
categoryResultResponse.category.name
) {
event.category = categoryResultResponse.category.name;
}
// save to the databse
return ref.push(event);
}
})
);
return res.status(200).send("processed events");
} catch (error) {
console.error(error.message);
}
});
A quick overview of the changes -
Use await and async calls to wait for things to complete vs. being triggered in a callback (async and await are generally much easier to read than promises with .then functions as the execution order is the order of the code)
Used request-promise-native which supports promises / await (i.e. the await means wait until the promise returns so we need something that returns a promise)
Used const and let vs. var for variables; this improves the scope of variables
Instead of doing checks like if(is good) { do good things } use a if(isbad) { return some error} do good thin. This makes the code easier to read and prevents lots of nested ifs where you don't know where they end
Use a Promise.all() so retrieving the categories for each event is done in parallel
There are two main changes you should implement in your code:
Since request does not return a promise you need to use an interface wrapper for request, like request-promise in order to correctly chain the different asynchronous events (See Doug's comment to your question)
Since you will then call several times (in parallel) the different endpoints with request-promise you need to use Promise.all() in order to wait all the promises resolve before sending back the response. This is also the case for the different calls to the Firebase push() method.
Therefore, modifying your code along the following lines should work.
I let you modifying it in such a way you get the values of name and description used to construct the event object. The order of the items in the results array is exactly the same than the one of the promises one. So you should be able, knowing that, to get the values of name and description within results.forEach(groupBody => {}) e.g. by saving these values in a global array.
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp();
var rp = require('request-promise');
exports.foo = functions.https.onRequest((req, res) => {
var ref = admin.database().ref('/foo');
var options = {
url:
'https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=****',
json: true
};
rp(options)
.then(body => {
if ('results' in body) {
const promises = [];
for (var i = 0; i < body.results.length; i++) {
var result = body.results[i];
if (
'name' in result &&
'description' in result &&
'group' in result &&
'urlname' in result.group
) {
var groupOptions = {
url:
'https://api.meetup.com/' +
result.group.urlname +
'?sign=true&photo-host=public&key=****',
json: true
};
promises.push(rp(groupOptions));
}
}
return Promise.all(promises);
} else {
throw new Error('err xxxx');
}
})
.then(results => {
const promises = [];
results.forEach(groupBody => {
if ('category' in groupBody && 'name' in groupBody.category) {
var event = {
name: '....',
description: '...',
category: groupBody.category.name
};
promises.push(ref.push(event));
} else {
throw new Error('err xxxx');
}
});
return Promise.all(promises);
})
.then(() => {
res.send('processed events');
})
.catch(error => {
res.status(500).send(error);
});
});
I made some changes and got it working with Node 8. I added this to my package.json:
"engines": {
"node": "8"
}
And this is what the code looks like now, based on R. Wright's answer and some Firebase cloud function sample code.
const functions = require("firebase-functions");
const admin = require("firebase-admin");
admin.initializeApp();
const request = require("request-promise-native");
exports.foo = functions.https.onRequest(
async (req, res) => {
var ref = admin.database().ref("/foo");
var options = {
url: "https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=****",
json: true
};
await request(
options,
async (error, response, body) => {
if (error) {
console.error(JSON.stringify(error));
res.status(500).end();
} else if ("results" in body) {
for (var i = 0; i < body.results.length; i++) {
var result = body.results[i];
if ("name" in result &&
"description" in result &&
"group" in result &&
"urlname" in result.group
) {
var groupOptions = {
url: "https://api.meetup.com/" + result.group.urlname + "?sign=true&photo-host=public&key=****",
json: true
};
var groupBody = await request(groupOptions);
if ("category" in groupBody && "name" in groupBody.category) {
var event = {
name: result.name,
description: result.description,
category: groupBody.category.name
};
await ref.push(event);
}
}
}
res.status(200).send("processed events");
}
}
);
}
);
I'm trying to update a field timestamp with the Firestore admin timestamp in a collection with more than 500 docs.
const batch = db.batch();
const serverTimestamp = admin.firestore.FieldValue.serverTimestamp();
db
.collection('My Collection')
.get()
.then((docs) => {
serverTimestamp,
}, {
merge: true,
})
.then(() => res.send('All docs updated'))
.catch(console.error);
This throws an error
{ Error: 3 INVALID_ARGUMENT: cannot write more than 500 entities in a single call
at Object.exports.createStatusError (C:\Users\Growthfile\Desktop\cf-test\functions\node_modules\grpc\src\common.js:87:15)
at Object.onReceiveStatus (C:\Users\Growthfile\Desktop\cf-test\functions\node_modules\grpc\src\client_interceptors.js:1188:28)
at InterceptingListener._callNext (C:\Users\Growthfile\Desktop\cf-test\functions\node_modules\grpc\src\client_interceptors.js:564:42)
at InterceptingListener.onReceiveStatus (C:\Users\Growthfile\Desktop\cf-test\functions\node_modules\grpc\src\client_interceptors.js:614:8)
at callback (C:\Users\Growthfile\Desktop\cf-test\functions\node_modules\grpc\src\client_interceptors.js:841:24)
code: 3,
metadata: Metadata { _internal_repr: {} },
details: 'cannot write more than 500 entities in a single call' }
Is there a way that I can write a recursive method which creates a batch object updating a batch of 500 docs one by one until all the docs are updated.
From the docs I know that delete operation is possible with the recursive approach as mentioned here:
https://firebase.google.com/docs/firestore/manage-data/delete-data#collections
But, for updating, I'm not sure how to end the execution since the docs are not being deleted.
I also ran into the problem to update more than 500 documents inside a Firestore collection. And i would like to share how i solved this problem.
I use cloud functions to update my collection inside Firestore but this should also work on client side code.
The solution counts every operation which is made to the batch and after the limit is reached a new batch is created and pushed to the batchArray.
After all updates are completed the code loops through the batchArray and commits every batch which is inside the array.
It is important to count every operation set(), update(), delete() which is made to the batch because they all count to the 500 operation limit.
const documentSnapshotArray = await firestore.collection('my-collection').get();
const batchArray = [];
batchArray.push(firestore.batch());
let operationCounter = 0;
let batchIndex = 0;
documentSnapshotArray.forEach(documentSnapshot => {
const documentData = documentSnapshot.data();
// update document data here...
batchArray[batchIndex].update(documentSnapshot.ref, documentData);
operationCounter++;
if (operationCounter === 499) {
batchArray.push(firestore.batch());
batchIndex++;
operationCounter = 0;
}
});
batchArray.forEach(async batch => await batch.commit());
return;
I liked this simple solution:
const users = await db.collection('users').get()
const batches = _.chunk(users.docs, 500).map(userDocs => {
const batch = db.batch()
userDocs.forEach(doc => {
batch.set(doc.ref, { field: 'myNewValue' }, { merge: true })
})
return batch.commit()
})
await Promise.all(batches)
Just remember to add import * as _ from "lodash" at the top. Based on this answer.
You can use default BulkWriter. This method used 500/50/5 rule.
Example:
let bulkWriter = firestore.bulkWriter();
bulkWriter.create(documentRef, {foo: 'bar'});
bulkWriter.update(documentRef2, {foo: 'bar'});
bulkWriter.delete(documentRef3);
await close().then(() => {
console.log('Executed all writes');
});
As mentioned above, #Sebastian's answer is good and I upvoted that too. Although faced an issue while updating 25000+ documents in one go.
The tweak to logic is as below.
console.log(`Updating documents...`);
let collectionRef = db.collection('cities');
try {
let batch = db.batch();
const documentSnapshotArray = await collectionRef.get();
const records = documentSnapshotArray.docs;
const index = documentSnapshotArray.size;
console.log(`TOTAL SIZE=====${index}`);
for (let i=0; i < index; i++) {
const docRef = records[i].ref;
// YOUR UPDATES
batch.update(docRef, {isDeleted: false});
if ((i + 1) % 499 === 0) {
await batch.commit();
batch = db.batch();
}
}
// For committing final batch
if (!(index % 499) == 0) {
await batch.commit();
}
console.log('write completed');
} catch (error) {
console.error(`updateWorkers() errored out : ${error.stack}`);
reject(error);
}
Explanations given on previous comments already explain the issue.
I'm sharing the final code that I built and worked for me, since I needed something that worked in a more decoupled manner, instead of the way that most of the solutions presented above do.
import { FireDb } from "#services/firebase"; // = firebase.firestore();
type TDocRef = FirebaseFirestore.DocumentReference;
type TDocData = FirebaseFirestore.DocumentData;
let fireBatches = [FireDb.batch()];
let batchSizes = [0];
let batchIdxToUse = 0;
export default class FirebaseUtil {
static addBatchOperation(
operation: "create",
ref: TDocRef,
data: TDocData
): void;
static addBatchOperation(
operation: "update",
ref: TDocRef,
data: TDocData,
precondition?: FirebaseFirestore.Precondition
): void;
static addBatchOperation(
operation: "set",
ref: TDocRef,
data: TDocData,
setOpts?: FirebaseFirestore.SetOptions
): void;
static addBatchOperation(
operation: "create" | "update" | "set",
ref: TDocRef,
data: TDocData,
opts?: FirebaseFirestore.Precondition | FirebaseFirestore.SetOptions
): void {
// Lines below make sure we stay below the limit of 500 writes per
// batch
if (batchSizes[batchIdxToUse] === 500) {
fireBatches.push(FireDb.batch());
batchSizes.push(0);
batchIdxToUse++;
}
batchSizes[batchIdxToUse]++;
const batchArgs: [TDocRef, TDocData] = [ref, data];
if (opts) batchArgs.push(opts);
switch (operation) {
// Specific case for "set" is required because of some weird TS
// glitch that doesn't allow me to use the arg "operation" to
// call the function
case "set":
fireBatches[batchIdxToUse].set(...batchArgs);
break;
default:
fireBatches[batchIdxToUse][operation](...batchArgs);
break;
}
}
public static async runBatchOperations() {
// The lines below clear the globally available batches so we
// don't run them twice if we call this function more than once
const currentBatches = [...fireBatches];
fireBatches = [FireDb.batch()];
batchSizes = [0];
batchIdxToUse = 0;
await Promise.all(currentBatches.map((batch) => batch.commit()));
}
}
Based on all the above answers, I put together the following pieces of code that one can put into a module in JavaScript back-end and front-end to easily use Firestore batch writes, without worrying about the 500 writes limit.
Back-end (Node.js)
// The Firebase Admin SDK to access Firestore.
const admin = require("firebase-admin");
admin.initializeApp();
// Firestore does not accept more than 500 writes in a transaction or batch write.
const MAX_TRANSACTION_WRITES = 499;
const isFirestoreDeadlineError = (err) => {
console.log({ err });
const errString = err.toString();
return (
errString.includes("Error: 13 INTERNAL: Received RST_STREAM") ||
errString.includes("Error: 4 DEADLINE_EXCEEDED: Deadline exceeded")
);
};
const db = admin.firestore();
// How many transactions/batchWrites out of 500 so far.
// I wrote the following functions to easily use batchWrites wthout worrying about the 500 limit.
let writeCounts = 0;
let batchIndex = 0;
let batchArray = [db.batch()];
// Commit and reset batchWrites and the counter.
const makeCommitBatch = async () => {
console.log("makeCommitBatch");
await Promise.all(batchArray.map((bch) => bch.commit()));
};
// Commit the batchWrite; if you got a Firestore Deadline Error try again every 4 seconds until it gets resolved.
const commitBatch = async () => {
try {
await makeCommitBatch();
} catch (err) {
console.log({ err });
if (isFirestoreDeadlineError(err)) {
const theInterval = setInterval(async () => {
try {
await makeCommitBatch();
clearInterval(theInterval);
} catch (err) {
console.log({ err });
if (!isFirestoreDeadlineError(err)) {
clearInterval(theInterval);
throw err;
}
}
}, 4000);
}
}
};
// If the batchWrite exeeds 499 possible writes, commit and rest the batch object and the counter.
const checkRestartBatchWriteCounts = () => {
writeCounts += 1;
if (writeCounts >= MAX_TRANSACTION_WRITES) {
batchIndex++;
batchArray.push(db.batch());
writeCounts = 0;
}
};
const batchSet = (docRef, docData) => {
batchArray[batchIndex].set(docRef, docData);
checkRestartBatchWriteCounts();
};
const batchUpdate = (docRef, docData) => {
batchArray[batchIndex].update(docRef, docData);
checkRestartBatchWriteCounts();
};
const batchDelete = (docRef) => {
batchArray[batchIndex].delete(docRef);
checkRestartBatchWriteCounts();
};
module.exports = {
admin,
db,
MAX_TRANSACTION_WRITES,
checkRestartBatchWriteCounts,
commitBatch,
isFirestoreDeadlineError,
batchSet,
batchUpdate,
batchDelete,
};
Front-end
// Firestore does not accept more than 500 writes in a transaction or batch write.
const MAX_TRANSACTION_WRITES = 499;
const isFirestoreDeadlineError = (err) => {
return (
err.message.includes("DEADLINE_EXCEEDED") ||
err.message.includes("Received RST_STREAM")
);
};
class Firebase {
constructor(fireConfig, instanceName) {
let app = fbApp;
if (instanceName) {
app = app.initializeApp(fireConfig, instanceName);
} else {
app.initializeApp(fireConfig);
}
this.name = app.name;
this.db = app.firestore();
this.firestore = app.firestore;
// How many transactions/batchWrites out of 500 so far.
// I wrote the following functions to easily use batchWrites wthout worrying about the 500 limit.
this.writeCounts = 0;
this.batch = this.db.batch();
this.isCommitting = false;
}
async makeCommitBatch() {
console.log("makeCommitBatch");
if (!this.isCommitting) {
this.isCommitting = true;
await this.batch.commit();
this.writeCounts = 0;
this.batch = this.db.batch();
this.isCommitting = false;
} else {
const batchWaitInterval = setInterval(async () => {
if (!this.isCommitting) {
this.isCommitting = true;
await this.batch.commit();
this.writeCounts = 0;
this.batch = this.db.batch();
this.isCommitting = false;
clearInterval(batchWaitInterval);
}
}, 400);
}
}
async commitBatch() {
try {
await this.makeCommitBatch();
} catch (err) {
console.log({ err });
if (isFirestoreDeadlineError(err)) {
const theInterval = setInterval(async () => {
try {
await this.makeCommitBatch();
clearInterval(theInterval);
} catch (err) {
console.log({ err });
if (!isFirestoreDeadlineError(err)) {
clearInterval(theInterval);
throw err;
}
}
}, 4000);
}
}
}
async checkRestartBatchWriteCounts() {
this.writeCounts += 1;
if (this.writeCounts >= MAX_TRANSACTION_WRITES) {
await this.commitBatch();
}
}
async batchSet(docRef, docData) {
if (!this.isCommitting) {
this.batch.set(docRef, docData);
await this.checkRestartBatchWriteCounts();
} else {
const batchWaitInterval = setInterval(async () => {
if (!this.isCommitting) {
this.batch.set(docRef, docData);
await this.checkRestartBatchWriteCounts();
clearInterval(batchWaitInterval);
}
}, 400);
}
}
async batchUpdate(docRef, docData) {
if (!this.isCommitting) {
this.batch.update(docRef, docData);
await this.checkRestartBatchWriteCounts();
} else {
const batchWaitInterval = setInterval(async () => {
if (!this.isCommitting) {
this.batch.update(docRef, docData);
await this.checkRestartBatchWriteCounts();
clearInterval(batchWaitInterval);
}
}, 400);
}
}
async batchDelete(docRef) {
if (!this.isCommitting) {
this.batch.delete(docRef);
await this.checkRestartBatchWriteCounts();
} else {
const batchWaitInterval = setInterval(async () => {
if (!this.isCommitting) {
this.batch.delete(docRef);
await this.checkRestartBatchWriteCounts();
clearInterval(batchWaitInterval);
}
}, 400);
}
}
}
No citations or documentation, this code i invented by myself and for me it worked and looks clean, and simple for read and usage. If some one like it, then can use it too.
Better make autotest becose code use private var _ops wich can be changed after packages upgrade. Forexample in old versions its can be _mutations
async function commitBatch(batch) {
const MAX_OPERATIONS_PER_COMMIT = 500;
while (batch._ops.length > MAX_OPERATIONS_PER_COMMIT) {
const batchPart = admin.firestore().batch();
batchPart._ops = batch._ops.splice(0, MAX_OPERATIONS_PER_COMMIT - 1);
await batchPart.commit();
}
await batch.commit();
}
Usage:
const batch = admin.firestore().batch();
batch.delete(someRef);
batch.update(someRef);
...
await commitBatch(batch);
Simple solution
Just fire twice ?
my array is "resultsFinal"
I fire batch once with a limit of 490 , and second with a limit of the lenght of the array ( results.lenght)
Works fine for me :)
How you check it ?
You go to firebase and delete your collection , firebase say you have delete XXX docs , same as the lenght of your array ? Ok so you are good to go
async function quickstart(results) {
// we get results in parameter for get the data inside quickstart function
const resultsFinal = results;
// console.log(resultsFinal.length);
let batch = firestore.batch();
// limit of firebase is 500 requests per transaction/batch/send
for (i = 0; i < 490; i++) {
const doc = firestore.collection('testMore490').doc();
const object = resultsFinal[i];
batch.set(doc, object);
}
await batch.commit();
// const batchTwo = firestore.batch();
batch = firestore.batch();
for (i = 491; i < 776; i++) {
const objectPartTwo = resultsFinal[i];
const doc = firestore.collection('testMore490').doc();
batch.set(doc, objectPartTwo);
}
await batch.commit();
}
I am working on a Resource Manager and some jobs must be atomic, because there may be race condition.
I see a solution is provided in a similar post
Javascript semaphore / test-and-set / lock?
which says implement a basic integer semaphore, just add the variable into the DOM and lock/unlock it and make sure your functions keep checking it, else timeout them
Can someone help me how can I implement it for below code ? and How can I add a variable to DOM ?
In the code below retrievePartners methods returns partners based on what user asked in capabilities object, and retrievePartners method may have been called same time from another user, asking for same partners (browsers.) So this method should be atomic, means should deal only with one capabilities at a same time.
async retrievePartners (capabilities) {
const appropriatePartners = { chrome: [], firefox: [], safari: [], ie: [] }
const partners = await this.getAllPartners()
allTypeNumber = 0
// first check if there is available appropriate Partners
Object.keys(capabilities.type).forEach(key => {
let typeNumber = parseInt(capabilities.type[key])
allTypeNumber = allTypeNumber + typeNumber
for (let i = 0; i < typeNumber; i++) {
partners.forEach((partner, i) => {
if (
key === partner.value.type &&
partner.value.isAvailable &&
appropriatePartners[key].length < typeNumber
) {
appropriatePartners[key].push(partner)
}
})
if (appropriatePartners[key].length < typeNumber) {
throw new Error(
'Sorry there are no appropriate Partners for this session'
)
}
}
})
if (allTypeNumber === 0) {
throw new Error('Please mention at least 1 type of browser !')
} else {
Object.keys(appropriatePartners).forEach(key => {
appropriatePartners[key].forEach(partner => {
this.instructorPeer.set('/partners/' + partner.id + '/states/', {
isAvailable: false
})
})
})
return appropriatePartners
}
}
getAllPartners method
async getAllPartners (capabilities) {
const partners = []
const paths = await this.instructorPeer.get({
path: { startsWith: '/partners/' }
})
paths.forEach((path, i) => {
if (path.fetchOnly) {
let obj = {}
obj.value = path.value
obj.id = path.path.split('/partners/')[1]
obj.value.isAvailable = paths[i + 1].value.isAvailable
partners.push(obj)
}
})
return partners
}
Here is how I call the method
async function startTest () {
const capabilities = {
type: {
chrome: 1
}
}
test('Example test', async t => {
try {
session = await workout.createSession(capabilities)
const partners = session.partners
const partner = partners.chrome[0]
...
I'm using two chained firebase requests in my code. the first one is fetching all the reviews, then I iterate trough all the results, firing a second firebase request for each element in loop.
After the loop, I've updated the state with the new data using setState. And that update is making my GUI transparent, like so:
the bottom part is randomly transparent each time, sometimes the render is visible a bit, sometimes not at all. When I remove the setState block, everything is fine.
The code in question:
getReviews() {
let reviewData = {}
let vendorId = this.props.item.vendor_id
let product = this.props.item.key
firebase.database().ref('u_products/' + vendorId + '/' + product + '/reviews/').once('value', (data) => {
reviewData = data.val()
if (!reviewData) {
this.setState({
loadedReviews: true
})
return
} else {
for (var key in reviewData) {
firebase.database().ref('users/' + reviewData[key].poster_uid + '/').once('value', (data) => {
let userData = data.val()
if (userData) {
this.getUserImageUrl()
...
}
})
}
this.state.reviewData = reviewData
this.setState({
dataSource: this.state.dataSource.cloneWithRows(reviewData),
loadedReviews: true,
})
}
})
}
Intended behaviour is first firebase request -> second one iterating for all the results from the first request ->setState.
Does anyone else have this issue?
firebase's .once() returns a Promise, so what you need to do is create an array of Promises, then call Promise.all(array).then((arrayOfData) => { ... });
You can now process the resulting array, then call .setState()
Here's a mockup:
/* firebase mockup */
function Result(url) {
this.url = url;
this.once = function(param, callback) {
return new Promise((resolve, reject) => {
setTimeout(() => {
var data = {
val: () => {
return this.url + " query result";
}
};
resolve(data);
}, Math.random() * 500 + 1000);
});
};
}
var firebase = {
database: function() {
return firebase;
},
ref: function(url) {
return new Result(url);
}
};
var reviewData = {
"test": {
poster_uid: "testData"
},
"2nd": {
poster_uid: "2ndData"
}
};
// ACTUAL IMPORTANT CODE BELOW #################
var arrayOfPromises = [];
for (var key in reviewData) {
arrayOfPromises.push(firebase.database().ref('users/' + reviewData[key].poster_uid + '/').once('value'));
}
Promise.all(arrayOfPromises).then(function(arrayOfResults) {
arrayOfResults.map((data, i) => {
let userData = data.val();
if (userData) {
console.log(i, userData);
}
});
console.log("setting state now");
});