I have a function that has more than 1400+ crypto pairs and I have to send an API against each pair and store the trades. Now each pair takes 3-4 seconds hence the whole function takes a lot of time. I am getting the pairs from my DB and I am storing trade data in my DB as well. I need to process the pairs in parallel so the trades from the pair in the beginning don't miss because the function is not processing.
This is my current function:
const getTrades = async () => {
let page = 1;
const results = await db.query("SELECT * FROM pairs;");
const pairs = results.rows;
const latest = await db.query("SELECT MAX(trade_time) FROM trades");
const latestTrade = latest.rows[0].max;
const coinResult = await db.query("SELECT * FROM coins");
let coinsInfo = coinResult.rows;
coinsInfo = coinsInfo.flat();
for (const pair of pairs) {
let biggestTrade = [];
const response = await axios.get(
`https://api.binance.com/api/v3/trades?symbol=${pair.pair}`
);
let filtered = response.data;
filtered = filtered.filter((trade) => trade.time > latestTrade);
let sells = filtered.filter((trade) => trade.isBuyerMaker === true);
let buys = filtered.filter((trade) => trade.isBuyerMaker === false);
if (sells.length > 0) {
biggestTrade.push(
sells.reduce(function (prev, current) {
return prev.quoteQty > current.quoteQty ? prev : current;
})
);
}
if (buys.length > 0) {
biggestTrade.push(
buys.reduce(function (prev, current) {
return prev.quoteQty > current.quoteQty ? prev : current;
})
);
}
biggestTrade = biggestTrade.flat();
for (const trade of filtered) {
let priceUSD = 0;
let baseAssetIcon = "null";
for (const coin of coinsInfo) {
if (coin.symbol.toUpperCase() === pair.quote_asset) {
priceUSD = coin.current_price;
}
if (coin.symbol.toUpperCase() === pair.base_asset) {
baseAssetIcon = coin.image_url;
}
if (priceUSD > 0 && baseAssetIcon != "null") {
break;
}
}
if (trade.quoteQty * priceUSD > 50000) {
const results = db.query(
"INSERT INTO trades (exchange_name, exchange_icon_url, trade_time, price_in_quote_asset,price_in_usd, trade_value, base_asset_icon, qty, quoteQty, is_buyer_maker, pair, base_asset_trade, quote_asset_trade) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12, $13)",
[
"Binance",
"https://assets.coingecko.com/markets/images/52/small/binance.jpg?1519353250",
trade.time,
trade.price,
priceUSD,
trade.quoteQty * priceUSD,
baseAssetIcon,
trade.qty,
trade.quoteQty,
trade.isBuyerMaker,
pair.pair,
pair.base_asset,
pair.quote_asset,
]
);
console.log("TRADE ADDED");
}
}
}
console.log("PAIRS ARE OVER");
};
pairs has over 1400 entries and this is the one where are looping through.
depends on how many servers you are running this function on.
if it's one single machine, use worker_threads, basically run the same function in separate threads to achieve parallelization, but to be honest, 1400 pairs are a lot, each for 3-4 seconds, so total around 1-2hrs per run if in serial. Depending on your machines, if you have 8 cores, it might reduce the time by 8 folds but still leave you like around 10 minutes. and cloud service usually charge a lot more for instances that have more cpu cores.
if it's multiple machines, use a master and a queue to push new pairs to each worker machine and for each worker machine, you can also generate multiple threads for each machine, in that way you can scale horizontally, and it's possible to finish the run in seconds. in this situation, each machine you can get the cheap one from cloud providers.
so depends on your requirements, if you wanna super fast, you gotta add more machines.
Related
This question already has answers here:
How do I debug error ECONNRESET in Node.js?
(18 answers)
Closed 11 months ago.
I am using a node server to get trades data from binance. There are over a thousand pairs against which trades need to be fetched. The function takes time to run completely. I need the function to restart whenever it is finished running so I keep getting new data while my server is live and running. However, after my server has been running for 10-15 minutes, the following error occurs:
I want the server to run permanently in the background and for this function to keep fetching trades from API and storing those trades in my DB. I have another GET method defined that then fetches the trades from my DB.
The function that I am trying to run permanently lies in my main server.js file:
const getTrades = async () => {
let page = 1;
let coins = [];
const results = await db.query("SELECT * FROM pairs;");
const pairs = results.rows;
const latest = await db.query("SELECT MAX(trade_time) FROM trades");
const latestTrade = latest.rows[0].max;
while (page < 55) {
gecko = await axios(
`https://api.coingecko.com/api/v3/coins/markets?vs_currency=USD&order=market_cap_desc&per_page=250&page=${page}`
);
coins.push(gecko.data);
page++;
}
console.log("Loop over");
coins = coins.flat();
for (const pair of pairs) {
let biggestTrade = [];
response = await axios.get(
`https://api.binance.com/api/v3/trades?symbol=${pair.pair}`
);
let filtered = response.data;
filtered = filtered.filter((trade) => trade.time > latestTrade);
let sells = filtered.filter((trade) => trade.isBuyerMaker === true);
let buys = filtered.filter((trade) => trade.isBuyerMaker === false);
if (sells.length > 0) {
biggestTrade.push(
sells.reduce(function (prev, current) {
return prev.quoteQty > current.quoteQty ? prev : current;
})
);
}
if (buys.length > 0) {
biggestTrade.push(
buys.reduce(function (prev, current) {
return prev.quoteQty > current.quoteQty ? prev : current;
})
);
}
biggestTrade = biggestTrade.flat();
for (const trade of biggestTrade) {
let priceUSD = 0;
let baseAssetIcon = "";
for (const coin of coins) {
if (coin.symbol.toUpperCase() === pair.quote_asset) {
priceUSD = coin.current_price;
}
if (coin.symbol.toUpperCase() === pair.base_asset) {
baseAssetIcon = coin.image;
}
if (priceUSD > 0 && baseAssetIcon.length > 0) {
break;
}
}
if (trade.quoteQty * priceUSD > 50000) {
const results = db.query(
"INSERT INTO trades (exchange_name, exchange_icon_url, trade_time, price_in_quote_asset,price_in_usd, trade_value, base_asset_icon, qty, quoteQty, is_buyer_maker, pair, base_asset_trade, quote_asset_trade) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12, $13)",
[
"Binance",
"https://assets.coingecko.com/markets/images/52/small/binance.jpg?1519353250",
trade.time,
trade.price,
priceUSD,
trade.quoteQty * priceUSD,
baseAssetIcon,
trade.qty,
trade.quoteQty,
trade.isBuyerMaker,
pair.pair,
pair.base_asset,
pair.quote_asset,
]
);
console.log("Trade Added");
}
}
}
console.log("PAIRS ARE OVER");
};
How can I make it so that the function runs repeatedly after a specified time period and the server does not break.
If you make continuous multiple calls to external third party API's without interval between calls, you are asking for being disconnected because API's have security policies that will prevents this kind of clients. Imagine if the entire world making 55 connections at once in a server. The server certainly will crash.
I see in your code you are making 55 calls at once. I recommend you put a delay between each call.
const delay = time => new Promise(res => setTimeout(res,time));
await delay(1000); // 1 second delay
There are other optimization that can prevent issues with connections in axios, like sharing httpAgent:
import http from "http"
import https from "https"
import axios from "axios"
const httpAgent = new http.Agent({ keepAlive: true })
const httpsAgent = new https.Agent({ keepAlive: true })
const api = axios.create({
baseURL: "http://google.com",
httpAgent,
httpsAgent,
})
//now you will reuse the axios instance:
while(page < 55) {
await delay(500);
gecko = await api(
`https://api.coingecko.com/api/v3/coins/markets?vs_currency=USD&order=market_cap_desc&per_page=250&page=${page}`
);
}
I am trying to create a script that pulls from the coin market cap API and displays the current price. The script is working fine on the back end when I assign the variable a value. However, when I try to run the function on sheets the returned value is null.
function marketview(ticker) {
var url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/quotes/latest?CMC_PRO_API_KEY=XXX&symbol=" + ticker;
var data = UrlFetchApp.fetch(url);
const jsondata = JSON.parse(data);
Logger.log(jsondata.data[ticker].quote['USD'].price)
}
My execution logs show that the scripts are running, but when when I use the function and try and quote ETH for example, the script is running for BTC.
When I do this on the backend and assign ETH the script works fine and returns the right quote. Any ideas on what I'm missing?
I did the same with coingecko API and add an issue having all my requests being rejected with quota exceeded error.
I understood that Google sheets servers IPs address were already spamming coingecko server. (I was obviously not the only one to try this).
This is why I used an external service like apify.com to pull the data and re-expose data over their API.
This is my AppScripts coingecko.gs:
/**
* get latest coingecko market prices dataset
*/
async function GET_COINGECKO_PRICES(key, actor) {
const coinGeckoUrl = `https://api.apify.com/v2/acts/${actor}/runs/last/dataset/items?token=${key}&status=SUCCEEDED`
return ImportJSON(coinGeckoUrl);
}
You need ImportJSON function, available here: https://github.com/bradjasper/ImportJSON/blob/master/ImportJSON.gs
Then in a cell I write: =GET_COINGECKO_PRICES(APIFY_API_KEY,APIFY_COINGECKO_MARKET_PRICES), you will have to create two field named APIFY_API_KEY and APIFY_COINGECKO_MARKET_PRICES in order for this to work.
Then register on apify.com, then you'll have to create an actor by forking apify-webscraper actor.
I set the StartURLs with https://api.coingecko.com/api/v3/coins/list, this will give me the total number of existing crypto (approx 11000 as of today), and number of page so I can run the request concurrently (rate limit is 10 concurrent requests on coingecko), then I just replace /list with /market and set the proper limit to get all the pages I need.
I use the following for the tasks page function:
async function pageFunction(context) {
let marketPrices = [];
const ENABLE_CONCURRENCY_BATCH = true;
const PRICE_CHANGE_PERCENTAGE = ['1h', '24h', '7d'];
const MAX_PAGE_TO_SCRAP = 10;
const MAX_PER_PAGE = 250;
const MAX_CONCURRENCY_BATCH_LIMIT = 10;
await context.WaitFor(5000);
const cryptoList = readJson();
const totalPage = Math.ceil(cryptoList.length / MAX_PER_PAGE);
context.log.info(`[Coingecko total cryptos count: ${cryptoList.length} (${totalPage} pages)]`)
function readJson() {
try {
const preEl = document.querySelector('body > pre');
return JSON.parse(preEl.innerText);
} catch (error) {
throw Error(`Failed to read JSON: ${error.message}`)
}
}
async function loadPage($page) {
try {
const params = {
vs_currency: 'usd',
page: $page,
per_page: MAX_PER_PAGE,
price_change_percentage: PRICE_CHANGE_PERCENTAGE.join(','),
sparkline: true,
}
let pageUrl = `${context.request.url.replace(/\/list$/, '/markets')}?`;
pageUrl += [
`vs_currency=${params.vs_currency}`,
`page=${params.page}`,
`per_page=${params.per_page}`,
`price_change_percentage=${params.price_change_percentage}`,
].join('&');
context.log.info(`GET page ${params.page} URL: ${pageUrl}`);
const page = await fetch(pageUrl).then((response) => response.json());
context.log.info(`Done GET page ${params.page} size ${page.length}`);
marketPrices = [...marketPrices, ...page];
return page
} catch (error) {
throw Error(`Fail to load page ${$page}: ${error.message}`)
}
}
try {
if (ENABLE_CONCURRENCY_BATCH) {
const fetchers = Array.from({ length: totalPage }).map((_, i) => {
const pageIndex = i + 1;
if (pageIndex > MAX_PAGE_TO_SCRAP) {
return null;
}
return () => loadPage(pageIndex);
}).filter(Boolean);
while (fetchers.length) {
await Promise.all(
fetchers.splice(0, MAX_CONCURRENCY_BATCH_LIMIT).map((f) => f())
);
}
} else {
let pageIndex = 1
let page = await loadPage(pageIndex)
while (page.length !== 0 && page <= MAX_PAGE_TO_SCRAP) {
pageIndex += 1
page = await loadPage(pageIndex)
}
}
} catch (error) {
context.log.info(`Fetchers failed: ${error.message}`);
}
context.log.info(`End: Updated ${marketPrices.length} prices for ${cryptoList.length} cryptos`);
const data = marketPrices.sort((a, b) => a.id.toLowerCase() > b.id.toLowerCase() ? 1 : -1);
context.log.info(JSON.stringify(data.find((item) => item.id.toLowerCase() === 'bitcoin')));
function sanitizer(item) {
item.symbol = item.symbol.toUpperCase()
return item;
}
return data.map(sanitizer)
}
I presume you are hiting the same issue I had with coinmarketcap, and that you could do the same with it.
You're not return ing anything to the sheet, but just logging it. Return it:
return jsondata.data[ticker].quote['USD'].price
I have a cloud function that is triggered when a sale/purchase is committed into firestore. This function's purpose is to update the inventory level centrally.
The function works just fine if I'm updating an item's inventory at only 1 warehouse, but doing so for multiple warehouses has unexpected behavior. I'm looping through all the warehouses that are affected to calculate the total inventory level changes, and every iteration kicks-off a javascript promise.
The problem seems to occur with the way the promises are invoked. E.g: if I want to update 3 warehouses and loop 3 times, somehow 5 promises are being kicked-off. This is visible through the logs. I've researched similar questions here, but the solutions were suggested while firestore was still in beta and might not be the right way forward. (Firestore transactions getting triggered multiple times resulting in wrong data)
Here is the code
export const onTransactionCreate = functions.firestore
.document('/companies/{companyId}/sub_transactions/{transId}')
.onCreate(async (snapshot, context) => {
const transId = context.params.transId
const stock_transaction: IStockTransaction = <IStockTransaction>snapshot.data()
const trans_type: TRANS_TYPE = stock_transaction.trans_type
const promises: any[] = []
stock_transaction.lineItems.forEach((element, index) => {
const ITEM_GUID = element.item_guid
const is_increasing = isIncreasingTransaction(element.line_trans_type)
const delta_stock = element.qty_transaction * (is_increasing ? 1 : -1)
const TARGET_BRANCH_ID = element.target_branch_guid
const itemRef = db.collection(FIRESTORE_PATHS.COL_COMPANIES).doc(companyId).
collection(FIRESTORE_PATHS.SUB_COMPANIES_ITEMS).
doc("" + ITEM_GUID)
const item_promise = db.runTransaction(async t => {
try {
const item_doc = await t.get(itemRef)
const item_branch_quantities: IBranchQuantity[] = (item_doc.data()!.branch_quantities || new Array())
const item_branch_ids: string[] = (item_doc.data()!.available_branch_ids || new Array())
const branch_index = item_branch_ids.indexOf(TARGET_BRANCH_ID)
console.log(`${transId} Line Item ${index}, after document.get(), search branch index: ${branch_index}`)
if (branch_index !== -1) {
const prev_qty = item_branch_quantities[branch_index]
const updated_qty = prev_qty.quantity + delta_stock
item_branch_quantities[branch_index] = {
item_guid: prev_qty.item_guid,
branch_guid: prev_qty.branch_guid,
quantity: updated_qty
}
console.log(`${transId} Line Item ${index} Updating qty # item ${delta_stock}, prev qty ${prev_qty.quantity}`)
} else {
item_branch_ids.push(TARGET_BRANCH_ID)
item_branch_quantities.push({
item_guid: element.item_guid,
branch_guid: TARGET_BRANCH_ID,
quantity: delta_stock
})
console.log(`${transId} Line Item ${index} Adding qty # item ${delta_stock}`)
}
t.update(itemRef, {
branch_quantities: item_branch_quantities,
available_branch_ids: item_branch_ids
})
} catch (err) {
throw new Error(err)
}
})
promises.push(item_promise)
});
return Promise.all(promises)
})
we have found the solution by reading this article.
A transaction consists of any number of get() operations followed by any number of write operations such as set(), update(), or delete(). In the case of a concurrent edit, Cloud Firestore runs the entire transaction again. For example, if a transaction reads documents and another client modifies any of those documents, Cloud Firestore retries the transaction. This feature ensures that the transaction runs on up-to-date and consistent data.
lineItems.forEach(element => {
const delta_transaction = element.qty * (isLineTransIncrease(element.line_trans_type) ? 1 : -1)
const itemRef = db.collection('companies').doc(companyId).collection('sub_items').doc("" + element.item_guid)
const p = db.runTransaction(t => {
return t.get(itemRef)
.then(doc => {
let item_branch_quantities: IBranchQuantity[] = doc.data()!.branch_quantities
let item_branch_ids: string[] = doc.data()!.available_branch_ids
if (!item_branch_quantities)
item_branch_quantities = new Array()
if (!item_branch_ids)
item_branch_ids = new Array()
const branch_index = item_branch_ids.indexOf(current_branch_id)
if (branch_index !== -1) {
const prev_qty = item_branch_quantities[branch_index]
const updated_qty: number = prev_qty.quantity + delta_transaction
item_branch_quantities[branch_index] = {
item_guid: prev_qty.item_guid,
branch_guid: prev_qty.branch_guid,
quantity: updated_qty
}
} else {
item_branch_ids.push(current_branch_id)
item_branch_quantities.push({
item_guid: element.item_guid,
branch_guid: current_branch_id,
quantity: delta_transaction
})
}
t.update(itemRef, {
branch_quantities: item_branch_quantities,
branch_ids: item_branch_ids
})
})
})
item_update_transactions.push(p)
});
return Promise.all(item_update_transactions)
})
function isLineTransIncrease(line_trans: number): boolean {
return (line_trans === 1) || (line_trans === 2)
}
I am giving a try to workers in js and I tried to make a simple sort using the same js sort function. The comparison i am making is just using an async function which will sort 60000 random numbers. The first will sort the random numbers as traditionally we are used to do it.
async function normalSort(arr) {
return new Promise((res) => {
let copy = arr;
copy.sort((a, b) => a > b ? 1 : -1);
return res(copy)
})
}
the other is a normal function which will be called for a workersHandler function
const { Worker, parentPort, workerData } = require('worker_threads');
function sort(data) {
let copy = data;
copy.sort((a, b) => a > b ? 1 : -1);
parentPort.postMessage(copy)
process.exit();
}
sort(workerData);
the workers handler function
const os = require('os');
const path = require('path');
const { Worker } = require('worker_threads');
async function workersHandler(arr) {
const startTime = Date.now();
const cpusAmount = os.cpus().length;
const chSize = Math.ceil(arr.length / cpusAmount)
let promises = [];
for (let i = 0; i < arr.length; i += chSize) {
const end = i + chSize;
const currentChunk = arr.slice(i, end);
const promise = new Promise((res, rej) => {
//#ts-ignore
const worker = new Worker(path.join(__dirname, '..', '/utils/sort.js'), { workerData: currentChunk })
worker.on('message', res)
worker.on('error', rej)
})
promises.push(promise);
}
let result = await Promise.all(promises)
return result;
}
and the main function which will call the others functions
function main() {
let arr = new Array(60000).fill(0).map((_, i) => Math.round(Math.random() * 100));
const startTime = Date.now();
workersHandler(arr).then(r => console.log('workers sort', Date.now() - startTime + ' ms'))
normalSort(arr).then(r => console.log('normal sort', Date.now() - startTime + ' ms'))
}
main();
Surprisingly the normal sort function is way faster and is working in one thread.
I am receiving for the workers function 101 ms
for the normal sort function 53 ms
Someone could explain me why these weird results?. Are workers not so fast or I am making a wrong implementation?.
Basically, using a single worker thread and waiting for it to do the work will always be slower than doing the work in the local thread, because:
Creating threads takes time.
Sending data between threads takes time.
Where you might get gains is if you have isolated pieces of work that can be handled in parallel, and multiple CPU cores to work with. In that situation, you can send different pieces of work to multiple workers (up to as many CPU cores as are available), provided the work isn't constrained by some other single resource they'd all be competing for.
Below I've posted a program that sorts 12 arrays locally and via workers with repeated races. (When sorting in workers, it transfers the array data to the worker and then back rather than copying it.) It starts the workers in advance and reuses them, and but it includes the time that took when determining the average time the workers took to do their work, so we're including all overhead.
On my workstation, with four CPU cores and letting it have a worker for each core, workers easily win:
# of workers: 4
Local average: 8790.010573029518ms
Workers' average: 3550.658817946911ms
Workers win, taking 40.39425% of the time local did
If I limit it to one worker, though, the worker is pure overhead and the local thread wins:
# of workers: 1
Local average: 8907.022233068943ms
Workers' average: 8953.339844942093ms
Local wins, taking 99.48268% of the time workers did
Even just two workers wins, because they can work in parallel on this multi-core machine:
# of workers: 2
Local average: 8782.853852927685ms
Workers' average: 4754.60275799036ms
Workers win, taking 54.13505% of the time local did
On a single core machine (if you can find one anymore), those two workers would be pure overhead again, and the local thread would win.
Here's main.js:
const os = require('os');
const { Worker } = require('worker_threads');
const { performance } = require('perf_hooks');
const MAX_UINT32 = (2**32)-1;
const ARRAY_SIZE = 100000;
const ARRAY_COUNT = 12;
const workerCount = +process.argv[2] || os.cpus().length;
const raceCount = +process.argv[3] || 5;
class WorkerQueue {
#workers;
#available;
#pending;
#checkPending = () => { // private methods still aren't unflagged in v13, so...
if (this.#available.length && this.#pending.length) {
const resolve = this.#pending.shift();
const worker = this.#available.shift();
resolve(worker);
}
};
constructor(...workers) {
this.#workers = new Set(workers);
this.#available = [...this.#workers];
this.#pending = [];
}
get() {
return new Promise(resolve => {
this.#pending.push(resolve);
this.#checkPending();
});
}
release(worker) {
if (!this.#workers.has(worker)) {
throw new Error("Uknown worker");
}
this.#available.push(worker);
this.#checkPending();
}
terminate() {
for (const worker of this.#workers) {
worker.terminate();
}
this.#workers = new Set();
this.#available = [];
this.#pending = [];
}
}
const {workers, workerCreationTime} = createWorkers();
main();
function createWorkers() {
const start = performance.now();
const workers = new WorkerQueue(
...Array.from({length: workerCount}, () => new Worker("./worker.js"))
);
const workerCreationTime = performance.now() - start;
return {workers, workerCreationTime};
}
async function main() {
try {
console.log(`Workers: ${workerCount} (in ${workerCreationTime}ms), races: ${raceCount}`);
let localAverage = 0;
let workersAverage = 0;
for (let n = 1; n <= raceCount; ++n) {
console.log(`Race #${n}:`);
const {localTime, workersTime} = await sortRace();
localAverage += localTime;
workersAverage += workersTime;
}
// Include the time it took to create the workers in the workers' average, as
// though we'd created them for each race. (We didn't because doing so would
// have given the local thread an advantage: after the first race, it's warmed
// up, but a new worker would be cold. So we let the workers be warm but add
// the full creation time into each race.
workersAverage += workerCreationTime;
console.log("----");
console.log(`# of workers: ${workerCount}`);
console.log(`Local average: ${localAverage}ms`);
console.log(`Workers' average: ${workersAverage}ms`);
if (localAverage > workersAverage) {
showWinner("Workers win", "local", workersAverage, localAverage);
} else {
showWinner("Local wins", "workers", localAverage, workersAverage);
}
workers.terminate();
} catch (e) {
console.error(e.message, e.stack);
}
}
function showWinner(msg, loser, winnerAverage, loserAverage) {
const percentage = (winnerAverage * 100) / loserAverage;
console.log(`${msg}, taking ${percentage.toFixed(5)}% of the time ${loser} did`);
}
async function sortRace() {
// Create a bunch of arrays for local to sort
const localArrays = Array.from({length: ARRAY_COUNT}, () => createRandomArray(ARRAY_SIZE));
// Copy those array so the workers are dealing with the same values
const workerArrays = localArrays.map(array => new Uint32Array(array));
const localStart = performance.now();
const localResults = await Promise.all(localArrays.map(sortLocal));
const localTime = performance.now() - localStart;
checkResults(localResults);
console.log(`Local time: ${localTime}ms`);
const workerStart = performance.now();
const workersResults = await Promise.all(workerArrays.map(sortViaWorker));
const workersTime = performance.now() - workerStart;
checkResults(workersResults);
console.log(`Workers' time: ${workersTime}ms`);
return {localTime, workersTime};
}
async function sortLocal(array) {
await Promise.resolve(); // To make it start asynchronously, like `sortViaWorker` does
array.sort((a, b) => a - b);
return array;
}
async function sortViaWorker(array) {
const worker = await workers.get();
return new Promise(resolve => {
worker.once("message", result => {
workers.release(worker);
resolve(result.array);
});
worker.postMessage({array}, [array.buffer]);
});
}
function checkResults(arrays) {
for (const array of arrays) {
const badIndex = array.findIndex((value, index) => index > 0 && array[index-1] > value);
if (badIndex !== -1) {
throw new Error(
`Error, array entry ${badIndex} has value ${array[badIndex]} ` +
`which is > previous value ${array[badIndex-1]}`
);
}
}
}
function createRandomArray(length) {
const array = new Uint32Array(Uint32Array.BYTES_PER_ELEMENT * length);
return randomFillArray(array);
}
function randomFillArray(array) {
for (let length = array.length, i = 0; i < length; ++i) {
array[i] = Math.random() * MAX_UINT32;
}
return array;
}
and worker.js:
const { parentPort } = require("worker_threads");
parentPort.on("message", ({array}) => {
array.sort((a, b) => a - b);
parentPort.postMessage({array}, [array.buffer]);
});
60000 may not be enough, IPC times matter
btw IPC: generic JavaScript datatypes, including generic JS arrays are heavy when copied to workers, but there are binary array types, https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
postMessage() has a transfer argument, but it applies to a limited number of types only.
https://nodejs.org/api/worker_threads.html#worker_threads_port_postmessage_value_transferlist and https://developer.mozilla.org/en-US/docs/Web/API/Worker/postMessage:
postMessage(value[, transferList])
node: transferList may be a list of ArrayBuffer and MessagePort objects. After transferring, they will not be usable on the sending side of the channel anymore (even if they are not contained in value).
MDN: An optional array of Transferable objects to transfer ownership of. If the ownership of an object is transferred, it becomes unusable (neutered) in the context it was sent from and becomes available only to the worker it was sent to. Transferable objects are instances of classes like ArrayBuffer, MessagePort or ImageBitmap objects that can be transferred.
Effect of types:
let typ=prompt("Type: 0/1/2/3 (Array/Float64Array/Float32Array/Uint32Array)");
let len=parseInt(prompt("Length"));
let basearray;
switch(typ){
case "1":basearray=new Float64Array(len);break;
case "2":basearray=new Float32Array(len);break;
case "3":basearray=new Uint32Array(len);break;
default: basearray=new Array(len);break;
}
for(let i=0;i<basearray.length;i++)
basearray[i]=Math.random()*0x1000000;
let cpus=4,
chunksize=basearray.length/cpus,
chunks=[],chunksw=[];
for(let i=0;i<cpus;i++)
chunksw[i]=(chunks[i]=basearray.slice(i*chunksize,(i+1)*chunksize)).slice();
let start=Date.now();
for(let i=0;i<cpus;i++)
chunks[i].sort((a,b)=>a-b);
console.log("Seq:",Date.now()-start);
let code="onmessage=event=>postMessage(event.data.sort((a,b)=>a-b));";
let ws=[],cnt=0;
for(let i=0;i<cpus;i++){
ws[i]=new Worker("data:text/plain,"+escape(code));
let j=i;
ws[i].onmessage=event=>{
chunksw[j]=event.data;
if(++cnt===cpus){
console.log("Par:",Date.now()-start);
if(len<=20)
for(let i=0;i<cpus;i++)
console.log(chunks[i],chunksw[i]);
}
};
}
start=Date.now();
for(let i=0;i<cpus;i++)
ws[i].postMessage(chunksw[i]);
Specify a length divisible by 4. If length is 20 or less, the resulting sorted chunks are going to be logged too for verification purposes. JS Array-s are reliably slower for me when passed around (compared to the thread-less run), regardless of containing 20 or 6000000 elements (while a 6-million-element JS array runs for 8 seconds for me on an older laptop, it still may be safer to start with something less). The other types are faster when threaded, Uint being the fastest.
Actually anything which is not 1/2/3 is going to result in a JS Array (the slowest one), including the empty string.
Effect of transfer is not that spectacular, but already appears from the beginning (with 4 elements it is 59-69 ms vs 20-22 ms on my PC):
let typ=prompt("Type: 0/1/2 (Float64Array/Float32Array/Uint32Array)");
let len=parseInt(prompt("Length"));
let basearray;
switch(typ){
case "1":basearray=new Float32Array(len);break;
case "2":basearray=new Uint32Array(len);break;
default:basearray=new Float64Array(len);
}
for(let i=0;i<basearray.length;i++)
basearray[i]=Math.random()*0x1000000;
let cpus=4,
chunksize=basearray.length/cpus,
chunksw=[],chunkswt=[];
for(let i=0;i<cpus;i++)
chunkswt[i]=(chunksw[i]=basearray.slice(i*chunksize,(i+1)*chunksize)).slice();
let start;
let code="onmessage=event=>postMessage(event.data.sort((a,b)=>a-b));";
let ws=[],cnt=0;
for(let i=0;i<cpus;i++){
ws[i]=new Worker("data:text/plain,"+escape(code));
let j=i;
ws[i].onmessage=event=>{
chunksw[j]=event.data;
if(++cnt===cpus){
console.log("Non-transfer:",Date.now()-start);
// launch transfer measurement
cnt=0;start=Date.now();
for(let i=0;i<cpus;i++)
wst[i].postMessage(chunkswt[i].buffer,[chunkswt[i].buffer]); }
};
}
let codet;
switch(typ){
case "1":
codet="onmessage=event=>{"+
"let arr=new Float32Array(event.data);"+
"arr.sort((a,b)=>a-b);"+
"postMessage(event.data,[event.data]);};";
break;
case "2":
codet="onmessage=event=>{"+
"let arr=new Uint32Array(event.data);"+
"arr.sort((a,b)=>a-b);"+
"postMessage(event.data,[event.data]);};";
break;
default:
codet="onmessage=event=>{"+
"let arr=new Float64Array(event.data);"+
"arr.sort((a,b)=>a-b);"+
"postMessage(event.data,[event.data]);};";
}
let wst=[];
for(let i=0;i<cpus;i++){
wst[i]=new Worker("data:text/plain,"+escape(codet));
let j=i;
wst[i].onmessage=event=>{
switch(typ){
case "1":chunkswt[j]=new Float32Array(event.data);break;
case "2":chunkswt[j]=new Uint32Array(event.data);break;
default:chunkswt[j]=new Float64Array(event.data);
}
if(++cnt===cpus){
console.log("Transfer:",Date.now()-start);
if(len<=20)
for(let i=0;i<cpus;i++)
console.log(chunksw[i],chunkswt[i]);
}
};
}
// launch non-transfer measurement
start=Date.now();
for(let i=0;i<cpus;i++)
ws[i].postMessage(chunksw[i]);
This code is a bit messy because it is the buffer which can be transferred, not the typed arrays themselves, and also, while the second measurement is initialized as a direct copy-paste (which already isn't that pretty), it is then launched from inside the completion function of the first one.
(I do not wish to provide exact measurement results because my PC is doing some other things too. Just run the snippets a couple times with varied or even repeated parameters)
I have upvote and downvote function's which are performing transactions and correctly manipulating the vote count in my database of sports player names.
Votes are cast as 1's and -1's. Then the math is done to total the player's vote count and put in the database as votes
Each time a vote is cast, I would like a function or piece of code to look through all the names in players and assign a number to each name depicting their rank among everyone in the database (based on their votes from most to least) (I.e. James has 10 upvotes and 0 down votes (votes = 10), he's rank 1. John has 10 upvotes and 1 downvote (votes = 9) and is rank 2. If I upvote John, I should refresh the page and see they are tied at 1. This works to a degree with my current code below, but once I start adding more names via the input and do some upvoting, downvoting, and retracting my votes, the voteCount variable gets all whacky and the ranks go way off course. I'm sure there's an easier and or better way to do this.
orderedPlayersRank is an array that sorts the players by votes, with the best first and worst last. So my #1 ranked person should always be first in the orderedPlayersRank array.
global vars
let prevPlayerVotes = 0
let rankCount = 1
//RANKING CODE
//orderedPlayersRank sorts players from highest votes to lowest
orderedPlayersRank.map((player) => {
this.database.child(player.id).transaction(function(player){
if (player.votes >= prevPlayerVotes) {
prevPlayerVotes = player.votes
player.rank = rankCount
} else if (player.votes < prevPlayerVotes) {
rankCount++
player.rank = rankCount
prevPlayerVotes = player.votes
} else {
console.log("Rank calculation error.")
}
return player;
})
})
Here's my complete upvote function just for reference. I'm putting the above code in where I have the //ranking functionality comment toward the bottom. In that spot, the ranking code is run anytime a valid vote is cast. I would be putting the same code in the downvote function as well.
upvotePlayer(playerId) {
const players = this.state.players;
const orderedPlayersRank = _.orderBy(players, ['votes'], ['desc'])
if (this.state.user) {
let ref = firebase.database().ref('/players/' + playerId + '/voters');
ref.once('value', snap => {
var value = snap.val()
if (value !== null) {
ref.child(this.uid).once('value', snap => {
if (snap.val() === 0 || snap.val() == null) {
ref.child(this.uid).set(1);
this.database.child(playerId).transaction(function(player) {
if (player) {
player.votes++
}
return player;
})
} else if (snap.val() === -1) {
ref.child(this.uid).set(1);
//Added vote balancing
this.database.child(playerId).transaction(function(player) {
if (player) {
player.votes++
player.votes++
}
return player;
})
} else if (snap.val() === 1) {
ref.child(this.uid).set(0);
//Added vote balancing
this.database.child(playerId).transaction(function(player) {
if (player) {
player.votes--
}
return player;
})
} else {
console.log("Error in upvoting. snap.val(): " + snap.val())
}
})
} else {
ref.child(this.uid).set(1);
this.alertUpVote()
//Added vote balancing
this.database.child(playerId).transaction(function(player) {
if (player) {
player.votes++
console.log("Player added")
}
return player;
})
}
});
//ranking functionality here
} else {
this.alertNotLoggedIn()
console.log("Must be logged in to vote.")
}
}
As I said, the upvote function is working fine. I'm just looking for some advice on the ranking feature I'm struggling with. I appreciate any help and can supply any other relevant code
So transactions can be run multiple times before completion if the data changes before the transaction is resolved. This can cause any variables outside the scope to become out of sync (i.e. rankCount and prevPlayerVotes). Another reason might be that you are looping over the orderedPlayersRank and returning a Promise for each call to transaction. This will cause prevPlayerRank and rankCount to be read/modified simultaneously instead of sequentially as I assume you are expecting.
One solution could just be to use orderByChild('votes') on the list and use the index paired with checking the previous value to determine rank at display time or set the rank when changes are made to votes (either by Firebase Function, or a watcher).
Ex. (Firebase Function)
export var rank = functions.database.ref('players/{playerId}/votes')
.onUpdate((change, context) => {
// list by 'votes' in ascending order
var orderedListRef = change.after.ref.root.child('players').orderByChild('votes')
var oldVotes = change.before.val()
var newVotes = change.after.val()
var notChanged = 0
var changeRank = 0
// went higher in the list so bump every player passed by 1
if (newVotes > oldVotes) {
// Range: [oldVotes, newVotes]
orderedListRef = orderedListRef.startAt(oldVotes).endAt(newVotes)
changeRank = 1
notChanged = newVotes
} else {// went lower in the list so bump every player passed by -1
// Range: [newVotes, oldVotes]
orderedListRef = orderedListRef.startAt(newVotes).endAt(oldVotes)
changeRank = -1
notChanged = oldVotes
}
return orderedListRef.once('value')
.then((ss) => {
var promises = []
var playersPassed = 0
// IMPORTANT: must use `forEach` to ensure proper order
ss.forEach((playerSS) => {
if (playerSS.key === context.params.playerId) {
return
}
playersPassed += 1
if (playerSS.child('votes').val() === notChanged) {
return
}
// use transaction to ensure proper number of bumps if multiple changes at once
promises.push(playerSS.child('rank').ref.transaction((rank) => {
return rank + changeRank
}))
})
// use transaction to adjust rank by players passed
promises.push(change.before.ref.parent.child('rank')
.transaction((rank) => {
return rank - playersPassed * changeRank
}))
return Promise.all(promises)
})
})
Initialization example
export var initRank = functions.database.ref('players/{playerId}/votes')
.onCreate((snapshot, context) => {
// list by 'votes' in ascending order
return Promise.all([
snapshot.ref.root
.child('players')
.orderByChild('votes')
.startAt(snapshot.val())
.once('value')
.then((ss) => {
return snapshot.ref.parent.child('rank').transaction((rank) => {
if (rank) {
return rank + ss.numChildren
}
return ss.numChildren
})
}),
snapshot.ref.root
.child('players')
.orderByChild('votes')
.endAt(snapshot.val()-1)
.once('value')
.then((ss) => {
var promises = []
ss.forEach((playerSS) => {
promises.push(playerSS.child('rank').ref.transaction((rank) => {
if (rank) {
return rank + 1
}
})
})
return Promise.all(promises)
})
])
})
With this approach, you will need to set the rank of newly created players to the highest rank. Hope this helps!