I am implementing a selection algorithm that selects an object based on a probability proportional to its score value. This makes higher-scoring objects more likely to be chosen.
My implementation is as follows:
var pool = [];
for (var i = 0; i < 100; i++)
pool.push({ Id: i, Score: Math.random() * 100 << 0 });
const NUM_RUNS = 100000;
var start = new Date();
for( var i = 0; i < NUM_RUNS; i++ )
rouletteSelection(pool);
var end = new Date();
var runningTime = (end.getTime() - start.getTime()) / 1000;
var avgExecutionTime = ( runningTime / NUM_RUNS ) * Math.pow(10,9);
console.log('Running Time: ' + runningTime + ' seconds');
console.log('Avg. Execution Time: ' + avgExecutionTime + ' nanoseconds');
function rouletteSelection(pool) {
// Sum all scores and normalize by shifting range to minimum of 0
var sumScore = 0, lowestScore = 0;
pool.forEach((obj) => {
sumScore += obj.Score;
if (obj.Score < lowestScore)
lowestScore = obj.Score;
})
sumScore += Math.abs(lowestScore * pool.length);
var rouletteSum = 0;
var random = Math.random() * sumScore << 0;
for (var i in pool) {
rouletteSum += pool[i].Score + lowestScore;
if (random < rouletteSum)
return pool[i];
}
//Failsafe
console.warn('Could not choose roulette winner, selecting random');
return pool[Math.random() * pool.length];
};
When run, the performance isn't bad: on my machine, each call to rouleteSelection() takes about 2500-3200 nanoseconds. However, before being used in production, I want to optimize this and shave off as much overhead as I can, as this function could be potentially called tens of millions of times in extreme cases.
An obvious optimization would be to somehow merge everything into a single loop so the object array is only iterated over once. The problem is, in order to normalize the scores (negative scores are shifted to 0), I need to know the lowest score to begin with.
Does anyone have any idea as to how to get around this?
At the risk of stating the obvious: just don't do the normalisation in every call to rouletteSelection. Do it once, after you constructed the pool.
Related
I am trying to find an efficient way to go through a big amount of data to determine how many units are processed at once.
So the data that I am receiving are just simple pairs:
{timestamp: *, solvetime: *}
What I need, is to see how many things are processed at each second.
To help you visualize what I mean: here is an example of data that I receive:
{{timestamp: 5, solvetime: 3},{timestamp: 7, solvetime: 5},{timestamp: 8, solvetime: 2},{timestamp: 12, solvetime: 10},{timestamp: 14, solvetime: 7}}
The chart below should help you understand how it looks in time:
https://i.stack.imgur.com/LEIhW.png
This is a simple case where the final calculation contains every second, but if the timeframe is much wider I show only 205 different times in this timeframe. E.g. if the time btw the first and the last timestamp is 20500 seconds I would calculate the usage for every second and divide the time into 205 parts - 100 seconds each and show only the second with the highest usage.
What I am doing right now is to iterate through all the pairs of input data and create a map of all the seconds, once I have it I go through this map again to find the highest usage in each time period (of 205 time periods I divide the whole time-frame in) and append it to the map of 205 timestamps.
It's working correctly, but it's very very slow and I feel like there is some better way to do it, a table might be faster but it is still not too efficient is it?
Here is the actual code that does it:
// results contain all the timestamps and solvetimes
// Timeframe of the chart
var start = Math.min.apply(Math, msgDetailsData.results.map((o) => { return o.timestamp; }))
var end = Math.max.apply(Math, msgDetailsData.results.map((o) => { return o.timestamp; }))
// map of all seconds in desired range (keys) the values are counter ofprocesses run in a given second
let mapOfSecondsInRange = new Map();
for (let i = start; i <= end; i++) {
mapOfSecondsInRange.set(i, 0);
}
// we go through every proces and add +1 to the value of each second in which the task was active
for (let element of msgDetailsData.results) {
var singleTaskStart = element.timestamp - Math.ceil(element.solveTime);
if (singleTaskStart < start) {
for (let i = singleTaskStart; i < start; i++) {
mapOfSecondsInRange.set(i, 0);
}
start = singleTaskStart;
}
for (let i = singleTaskStart; i < element.timestamp; i++) {
mapOfSecondsInRange.set(i, mapOfSecondsInRange.get(i) + 1);
}
}
// Preparation for the final map - all the seconds in the range divided into 205 parts.
const numberOfPointsOnChart = 205;
var numberOfSecondsForEachDataPoint = Math.floor((end - start) / numberOfPointsOnChart) + 1;
var leftoverSeconds = ((end - start) % numberOfPointsOnChart) + 1;
var highestUsageInGivenTimeframe = 0;
var timestampOfHighestUsage = 0;
let mapOfXXXDataPoints = new Map();
var currentElement = start;
for (let i = 0; i < numberOfPointsOnChart; i++) {
if (leftoverSeconds === 0) {
numberOfSecondsForEachDataPoint = numberOfSecondsForEachDataPoint - 1;
}
if (currentElement <= end) {
for (let j = 0; j < numberOfSecondsForEachDataPoint; j++) {
if (j === 0) {
highestUsageInGivenTimeframe = mapOfSecondsInRange.get(currentElement);
timestampOfHighestUsage = currentElement;
}
else {
if (mapOfSecondsInRange.get(currentElement) > highestUsageInGivenTimeframe) {
highestUsageInGivenTimeframe = mapOfSecondsInRange.get(currentElement);
timestampOfHighestUsage = currentElement;
}
}
currentElement = currentElement + 1;
}
mapOfXXXDataPoints.set(timestampOfHighestUsage, highestUsageInGivenTimeframe);
leftoverSeconds = leftoverSeconds - 1;
}
}
I want to generate an Unique 5 digits ID + 784 at the begining, the constraint, I can execute the script only one time, and I have to avoid the first 100 numbers so It can't be 00100 and lower. Since I use timestamp and I can execute only my script one time how I can handle this ?
I did this it's maybe dumb but at least I tried.
ConcatedID();
function ConcatedID()
{
var uniqID = checkProtectedRange();
if (checkProtectedRange())
{
var BarcodeID = 784 + uniqID;
return BarcodeID;
}
else
checkProtectedRange();
}
function checkProtectedRange()
{
var uniqueID = GenerateUniqueID();
var checkRange = uniqueID.substr(uniqueID.length - 3);
var checkRangeINT = parseInt(checkRange);
if (checkRangeINT <= 100)
return (false);
else
return (true);
}
function GenerateUniqueID()
{
var lengthID = 5;
var timestamp = + new Date();
var ts = timestamp.toString();
var parts = ts.split("").reverse();
var id = "";
var min = 0;
var max = parts.length -1;
for (var i = 0; i < lengthID; ++i)
{
var index = Math.floor(Math.random() * (max - min + 1)) + min;
id += parts[index];
}
gs.log('Generate ID ' + id);
return id;
}
Without being able to track previously used IDs, you're left with chance to prevent duplicates. Your shenanigans with Date doesn't really change that. See the birthday problem.
Given that, just follow the most straight-forward method: Generate a random string consisting of five digits.
function GenerateUniqueID() {
return ('0000'+(Math.random() * (100000 - 101) + 101)|0).slice(-5);
}
Or, if you want just the final integer with constraints applied:
function GenerateUniqueID() {
return (Math.random() * (78500000 - 78400101) + 78400101)|0;
}
I just took the Codility tape equilibrium test here
As you can see from my score I didn't get good enough on how fast the function executes. Can anybody give me some pointers so I can optimise this code and get closer to 100%?
Here is the code...
function solution(A) {
var minimumAbsDiff = null;
for(var currentIndex =1;currentIndex < A.length;currentIndex ++){
var bottomHalf = getTotal(0,currentIndex-1,A);
var topHalf = getTotal(currentIndex,A.length-1,A);
var absDiff = Math.abs(bottomHalf - topHalf);
if(minimumAbsDiff == null){
minimumAbsDiff = absDiff;
}else{
if(absDiff < minimumAbsDiff) minimumAbsDiff = absDiff;
}
}
return minimumAbsDiff;
}
function getTotal(start,end,arrayTocheck){
var total = 0;
for(var currentIndex = start;currentIndex <= end;currentIndex++){
total = total + arrayTocheck[currentIndex];
}
return total;
}
You don't want to optimise speed. You want to lower the algorithmic complexity. Your current algorithm is O(n²), while the taks description explicitly stated that
expected worst-case time complexity is O(N);
expected worst-case space complexity is O(N), beyond input storage
(not counting the storage required for input arguments).
So what's the insight to make that possible? Each total difference is only a small distance from the others for P. If you compare the value |(A[0] + ... + A[P-1]) - (A[P] + ... + A[N-1])| for P and P+1, there is only a constant amount of work difference to be done.
function solution(A) {
var left = 0,
right = A.reduce(function(a, b) { return a + b; }, 0);
var min = Infinity;
for (var p = 0; p<A.length-1; p++) {
left += A[p];
right -= A[p];
min = Math.min(min, Math.abs(left - right));
}
return min;
}
Dygraphs options provide 'rollPeriod' to support rolling averages and 'stepPlot' to support step plots. When set together when some data is missing in between, they give very unexpected results. For example, attached image link shows graph for original data (rollPeriod=1) and rollPeriod=5. (http://imgur.com/a/9ajh8)
At 40,000 for example, the rolling average must be zero. But, dygraphs takes average of last 5 datapoints instead of last 5 seconds.
Is it possible to get rolling average that maintains notion of time rather than data points. Thanks in advance !
PS- Sorry for image link. SO won't allow me to directly post images due to lack of reputation. :(
As you noticed, dygraphs averages the last five data points, not the last five seconds. This is all it can do, since it doesn't know the cadence of your data. Fortunately, you can fix this by adding explicit missing values:
Datetime,Value
2014-08-01 12:34:55,0
2014-08-01 12:34:56,
2014-08-01 12:34:57,0
2014-08-01 12:34:58,
2014-08-01 12:34:59,0
The zeros are values, the blanks are missing values.
See http://dygraphs.com/data.html for more information, or one of these two demos for examples.
Due to lack of this functionality, I implemented it by myself. I am putting the code here for someone in similar situation. Code uses internal function extractSeries_ in dygraph library and Queue.js. Use with extreme caution !
function calcAvg_(minDate, maxDate, dispData){
var windowSize = Math.round((maxDate-minDate)/100);
if(windowSize <= 1){
return dispData;
}
var energy = 0;
var lastS = new Queue();
var series = dispData;
var lastAvg = 0;
// Initially lastS elements are all 0
// lastS shall always be maintained of windowSize.
// Every enqueue after initialization in lastS shall be matched by a dequeue
for(j=0; j<windowSize; j++){
lastS.enqueue(0);
}
var avg_series = [];
var prevTime = minDate - windowSize;
var prevVal = 0;
avg_series.push([prevTime, prevVal]);
//console.log( "calcAvg_ min: " + minDate + " max: " + maxDate + " win: " + windowSize );
for(j=0; j<series.length; j++){
var time = series[j][0];
var value = series[j].slice(1);
if(time > minDate){
var k = 0;
while(k < windowSize && prevTime + k < time){
var tail = lastS.dequeue();
lastS.enqueue(prevVal);
lastAvg = lastAvg + (prevVal - tail)/windowSize;
avg_series.push([prevTime+k, lastAvg]);
k++;
}
}
prevTime = time;
prevVal = value;
if(time > maxDate){
break;
}
}
if(j == series.length){
//console.log("Fix last value");
var k = 0;
while(k < windowSize && prevTime + k < maxDate){
var tail = lastS.dequeue();
lastS.enqueue(prevVal);
lastAvg = lastAvg + (prevVal - tail)/windowSize;
avg_series.push([prevTime+k, lastAvg]);
k++;
}
}
//console.log(avg_series);
avg_series.push([maxDate, 0]);
return avg_series;
}
var blockRedraw = false;
myDrawCallback_ = function(gs, initial) {
if (blockRedraw) return;
blockRedraw = true;
var range = gs.xAxisRange();
var yrange = gs.yAxisRange();
var series = calcAvg_(range[0], range[1],
gs.extractSeries_(gs.rawData_, 0, false));
gs.updateOptions( {
dateWindow: range,
valueRange: yrange,
file: series } );
blockRedraw = false;
}
I've build my own flip command and, well its slow and takes forever. I would like to know if javascript has a blit or memcpy style command. Right now I'm going through item by item with for loops to do a copy and it takes "forever".
Here is a example use of my flip function. I'm running 3 layers, only 1 if full height, with 3 simple animations and the fps it topped out at about 35 FPS. Ideally 3 layes should be topped out at far far higher FPS, in the 200+ range I would expect.
v:36.8 l0:36.8 l1:57.8 l2:36.8 The layer's FPS are the rendering to their buffers, the v is the rendering to the canvas with the flip function. (These FPS are from Chrome on a mac)
v = the screen update, the main flip function listed below.
l0 = The bottom fire, its a full height layer
l2 = The static noise, its a 1/2 height layer
l3 = The top fire, its a 1/4 height layet
Imagine having 9 or 10 layers, the FPS would drop like a stone. In FF version 12 its already unusable... not even double digit FPS rates. Opera is at least double digets.
v:4.2 l0:4.2 l1:4.2 l2:4.2 (FF 12 OSX)
v:15.5 l0:15.5 l1:15.5 l2:15.5 (Opera latest OSX)
My flip function
flip : function() {
var fps = '';
// Combine the layers onto the back buffer
for (var l = 0; l < this.layers.length; l++)
{
fps += 'l' + l + ':' + this.layers[l].fps.toFixed(1) + ' ';
var layerWidth = this.layers[l].options.width;
var layerHeight = this.layers[l].options.height;
for (var x = 0; x < layerWidth; x++)
{
for (var y = 0; y < layerHeight; y++)
{
var index = (y*this.layers[l].options.width + x)*4;
var r = this.layers[l].buffer[index+0];
var g = this.layers[l].buffer[index+1];
var b = this.layers[l].buffer[index+2];
var a = this.layers[l].buffer[index+3];
if (r|g|b|a != 0) {
this.buffer.data[index+0] = r;
this.buffer.data[index+1] = g;
this.buffer.data[index+2] = b;
this.buffer.data[index+3] = a;
}
}
}
}
fps = 'v:' + this.fps.toFixed(1) + ' ' + fps;
this.$fps.html(fps);
// blit the buffer
this.context.putImageData(this.buffer, 0, 0);
// Calculate fps
var now = new Date;
var thisFrameFPS = 1000 / (now - this.last);
this.fps += (thisFrameFPS - this.fps) / 50;
this.last = now;
var t = this;
setTimeout(function() {t.flip.apply(t);}, this.speed);
}
There is a memcpy.js that uses TypedArray.prototype.subarray() if available.
The browser support is good and even IE10 have subarray.
function memcpy (src, srcOffset, dst, dstOffset, length) {
var i
src = src.subarray || src.slice ? src : src.buffer
dst = dst.subarray || dst.slice ? dst : dst.buffer
src = srcOffset ? src.subarray ?
src.subarray(srcOffset, length && srcOffset + length) :
src.slice(srcOffset, length && srcOffset + length) : src
if (dst.set) {
dst.set(src, dstOffset)
} else {
for (i=0; i<src.length; i++) {
dst[i + dstOffset] = src[i]
}
}
return dst
}
Your code can be improved, but I doubt that the speedup will be significant.
Here's what I came up with, note it is untested. I have assumed that the order of processing the layers is not significant, if it is replace the first for loop with your version.
function flip () {
var fps = '';
// Combine the layers onto the back buffer
for (var l = this.layers.length; l--;) {
fps += 'l' + l + ':' + this.layers[l].fps.toFixed (1) + ' ';
var layerWidth = this.layers[l].options.width;
var layerHeight = this.layers[l].options.height;
for (var index = 0, x = layerWidth; x--;) {
for (var y = layerHeight; y--; index += 4) {
var r = this.layers[l].buffer[index+0];
var g = this.layers[l].buffer[index+1];
var b = this.layers[l].buffer[index+2];
var a = this.layers[l].buffer[index+3];
if (r|g|b|a != 0) {
this.buffer.data[index+0] = r;
this.buffer.data[index+1] = g;
this.buffer.data[index+2] = b;
this.buffer.data[index+3] = a;
}
}
}
}
};
On the assumption that the r,g,b and a are all 8 bit quantiites you could consider packing them into a single int. That would reduce the processing in the inner loop. Even more efficient would be to use the new arrayBuffer facilities