Error in tensorflow.js multiple regression sgd minimize - javascript

this is my first post. I am facing an error with fitting a curve in tensorflow.js which I can't seem to fix. I have spent two days on it so far. Since tensorflow.js is pretty new, there's not a whole lot of answers to this sort of question out there, so I'm sure many people are interested in this. I have tried to replicate the example from the tensorflow.js project's website:
https://js.tensorflow.org/tutorials/fit-curve.html .
The difference is that I am using multiple predictors to predict an outcome variable. I have 20 prices, and I am using the previous 4 prices to predict the fifth one. So I start out with price number 5 and go up to price number 20 where price 5 is predicted by price 1 to 4 and so forth in a weighted time-series prediction model. I am using a multiple linear regression framework where I set up 4 random parameters (one weight for each of the four previous prices). My goal is to train the variable to minimize my loss function (using minimum least square criterion). I have tried following the example from the link as closely as possible. Whenever I run my code I get:
Error: The f passed in variableGrads(f) must be a function
which is generated by the call of .minimize in line 59 (right before return in the train function at the end). Basicially what I'm doing is fit a linear regression which could be more easily done in R but we aim at very large data sets and more complex machine learning procedures. I'm sure this is interesting to a lot of other people who are getting started woth tensorflow.js.
here's my code with some comments:
const tf = require('#tensorflow/tfjs');
require('#tensorflow/tfjs-node');
module.exports = function tensorFlow() {
//the trainable variable with initial random numbers
let lag = tf.variable(tf.tensor([Math.random(), Math.random(), Math.random(), Math.random()], [4]));
//20 observed prices
let priceData = [21.00397, 21.29068, 22.80492, 23.40646, 24.06598, 23.89722, 25.40211, 24.63436, 25.83449, 26.44832, 26.25194, 27.34009, 27.90455, 27.14175, 28.12549, 29.99411, 30.43631, 30.39753, 30.16104, 31.14931];
//the prices from price 5 on that are to be predicted
let toBePredictedList = [24.06598, 23.89722, 25.40211, 24.63436, 25.83449, 26.44832, 26.25194, 27.34009, 27.90455, 27.14175, 28.12549, 29.99411, 30.43631, 30.39753, 30.16104, 31.14931];
//set up tensor of labels to compare predictions with
let toBePredicted = tf.tensor(toBePredictedList, [16]);
//a list of predictors with 16 rows and four columns for 16 predictions to be made using 4 previous prices each
let predictorsList = [];
for (let predictorIndex = 0; predictorIndex < 16; predictorIndex++) {
for (let predictionsIndex = 0; predictionsIndex < 4; predictionsIndex++) {
predictorsList.push(priceData[predictorIndex + predictionsIndex]);
}
}
//make it a tensor
let predictors = tf.tensor(predictorsList, [16, 4]);
//predict multiplies all predictors in all lines with the parameters from lag to be trained and adds up the four elements to generate an estimate of the fifth price
function predict(predictors) {
function modelMaker() {
let modelList = [];
for (let rowIndex = 0; rowIndex < 16; rowIndex++) {
let prediction = 0;
for (let colIndex = 0; colIndex < 4; colIndex++) {
prediction += lag.get(colIndex) * predictors.get(rowIndex, colIndex);
console.log({prediction});
}
modelList.push(prediction);
}
return tf.tensor(modelList, [16]);
}
return tf.tidy(modelMaker);
}
//means square error of my prediction when compared to actual outcome price
function loss(predictions, toBePredicted) {
return tf.losses.meanSquaredError(toBePredicted, predictions);
}
function train(predictors, toBePredicted, numIterations) {
function computeLoss (predictors, toBePredicted) {
let predictions = predict(predictors);
return loss(predictions, toBePredicted);
}
let learningRate = 0.5; //suggested by Google Developers
const OPTIMIZER = tf.train.sgd(learningRate); //suggested by Google Developers
for (let iter = 0; iter < numIterations; iter++) {
OPTIMIZER.minimize(computeLoss(predictors, toBePredicted));
}
return {
a: lag.get(0),
b: lag.get(1),
c: lag.get(2),
d: lag.get(3)
};
};
//75 suggested by google developers
return train(predictors, toBePredicted, 75);
};
The problem is with minimze in the end as I said. The above code works fine and computes everything it is supposed to.
Thanks for any suggestions!
Chris

optimizer.minimize() updates the weights during each cycle of training. For the weights to be updated, they need to be created using tf.variable. A variable created using tf.variable is mutable whereas tf.tensor creates immutable variable.
It is also noteworthy to point out that the predict() should return a function whose coefficient are created using tf.variable that will be updated to minimize the loss function.

Related

Why is my function returning a sparse array (javascript)?

I'm learning Javascript. This is a problem I'm stuck on:
"Using a for loop, calculate the percentage of students' test scores stored in the testScore array. The test has a total of 100. Store the percentages in another array and display it to the console."
This was my solution (I know there's "better" alternative methods of solving this, but since I'm just learning, I'd rather understand first what I'm doing wrong here.):
const testScore = [10, 40, 30, 25];
const percentages = [];
const gradePercentage = (scores) => {
percentages[scores] = (scores / 100) * 100;
return percentages;
};
for (let i = 0; i < testScore.length; i++) {
gradePercentage(testScore[i]);
}
console.log(percentages);
I got the calculations right, but when trying to store it in another array, I get "empty" slots. Why?
This is what the console displayed:

(how) can I tell my JavaScript to append a variable/column to the data Qualtrics exports?

I've implemented a task in Qualtrics that randomly selects a number between 0 and 3, and then selects a corresponding word pool to sample 5 words from. To be able to analyze these data, though, I need to know which 5 words (or at minimum, the index number or name of the word pool being sampled from) is presented to each respondent. Is there a way to implement the recording of this information within JavaScript? Ideally this information would show up when I use Qualtrics' native "export" options, but if I have to somehow create a second spreadsheet with this treatment data, that works just fine as well.
Qualtrics.SurveyEngine.addOnload(function()
{
// first, create four arrays for the four word pools used in task
var wordpool1 = []
var wordpool2 = []
var wordpool3 = []
var wordpool4 = []
// assemble word list arrays into one array, with index 0-3
let masterwordlist = [wordpool1, wordpool2, wordpool3, wordpool4]
// function that randomly chooses an integer between x and y
function randomInteger(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
// function that shuffles (randomizes) a word list array (Fisher-Yates shuffle )
function shuffle(target){
for (var i = target.length - 1; i > 0; i--){
var j = Math.floor(Math.random() * (i + 1));
var temp = target[i];
target[i] = target[j];
target[j] = temp;
}
return target;
}
// function that chooses 5 words from a shuffled word list array, returns those 5 words as array
function pickWords(target) {
var randomwords = shuffle(target)
return randomwords.slice(0, 5);
}
// top-level function
function genWords(masterlist){
var x = randomInteger(0, 3)
return pickWords(masterlist[x])
}
// actually running the function
randomwords = genWords(masterwordlist)
// save final output as embedded qualtrics data
Qualtrics.SurveyEngine.setEmbeddedData("randomwords", randomwords);
Is there a way I can have this code record (within Qualtrics or otherwise) which values var x or var randomwords take on?
EDIT: I found another answer on here which may be relevant. According to this answer, though, it looks like I have all the code needed to record my variable selection; do I simply need to set embedded data within the survey flow, as well?
See here: Is it possible to save a variable from javascript to the qualtrics dataset?
Yes, you need to define the embedded data field randomwords in the survey flow.

Bellman's equation's loss in TFjs

I am trying to implement a simple Pong Q-Network in JS using p5 and tfjs.
To train the network, I first created a custom loss function where I passed a label tensor only containing the labels for the actions a_t
TFjs didn't really seemed to like the fact that the shape was different from the predictions from the model, so I crafted another custom loss function where the label input is a tensor of shape [batchSize, 3] (3 actions up, down and none) where each element is in the form [0,0,y_j] or [0,y_j,0] or [y_j,0,0] (y_j at the place I am supposed to compare to the predictions tensor and 0 elsewhere). Here it is:
function bellmanLoss(predictions, labels)
{
let predictions_buffer = predictions.buffer();
let labels_buffer = labels.buffer();
let length = labels.shape[0];
predictions.dispose();
labels.dispose();
let loss = 0;
for(let i = 0; i < length; i++)
{
for(let j = 0; j < 3; j++)
{
if(labels_buffer.get(i,j) != 0)
{
loss += Math.pow(labels_buffer.get(i,j) - predictions_buffer.get(i,j), 2);
break;
}
}
}
return tf.tensor(loss);
}
But here I am crafting a tensor from scratch and so I get this error using model.fit: "Error: Cannot find a connection between any variable and the result of the loss function y=f(x). Please make sure the operations that use variables are inside the function f passed to minimize()".
Is there a way to have a loss compatible with model.fit or do I have to tune the model weights manually (which would be a pain)?
Update : I've made some changes to be more "tensory" and it seem to go in the right direction:
function bellmanLoss(preds, labels)
{
let mask = tf.cast(labels, 'bool');
let zeros = tf.zerosLike(preds);
let clean_preds = preds.where(mask, zeros);
return tf.squaredDifference(clean_preds, labels).mean();
}
However, I still need to find another alternative because
"Error: Cannot compute gradient: gradient function not found for where"
Final update: I've found a way without using tf.where and it works
function bellmanLoss(preds, labels)
{
let mask_b = tf.cast(labels, 'bool');
let mask = tf.cast(mask_b, 'float32');
let clean_preds = preds.mul(mask);
return tf.squaredDifference(clean_preds, labels).mean();
}

How do you measure the difference between two sounds using the Web Audio API?

I'm attempting to measure the difference between two sounds using an analyser node and getByteFrequencyData(). I thought that by summing the difference in each frequency bin I could come up with a single number to represent how different the two sounds were. Then I would be able to change the sounds and measure the numbers again to see if the new sound was more or less different than before.
Does getByteFrequencyData() fully encompass the representation of a sound or do I need to include other pieces of data to qualify the sound?
Here is the code I'm using:
var Spectrogram = (function(){
function Spectrogram(ctx) {
this.analyser = ctx.createAnalyser();
this.analyser.fftSize = 2048;
this.sampleRate = 512;
this.scriptNode = ctx.createScriptProcessor(this.sampleRate, 1, 1);
this.scriptNode.onaudioprocess = this.process.bind(this);
this.analyser.connect(this.scriptNode);
this.startNode = this.analyser;
this.endNode = this.scriptNode;
this.data = [];
}
Spectrogram.prototype.process = function(e) {
var d = new Uint8Array(this.analyser.frequencyBinCount);
this.analyser.getByteFrequencyData(d);
this.data.push(d);
var inputBuffer = e.inputBuffer;
var outputBuffer = e.outputBuffer;
for(var channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
var inputData = inputBuffer.getChannelData(channel);
var outputData = outputBuffer.getChannelData(channel);
for(var sample = 0; sample < inputBuffer.length; sample++) {
outputData[sample] = inputData[sample];
}
}
};
Spectrogram.prototype.compare = function(other) {
var fitness = 0;
for(var i=0; i<this.data.length; i++) {
if(other.data[i]) {
for(var k=0; k<this.data[i].length; k++) {
fitness += Math.abs(this.data[i][k] - other.data[i][k]);
}
}
}
return fitness;
}
return Spectrogram;
})();
You could use the spectralFlux function provided by the Meyda package to compare two signals. Spectral flux is, according to wikipedia, "usually calculated as the 2-norm (also known as the Euclidean distance) between the two normalised spectra."
After runninng npm install --save meyda, you would do something like:
const spectralFlux = require('meyda/src/extractors/spectralFlux');
const difference = spectralFlux({
signal: [your first signal],
previousSignal: [your second signal]
});
Feel free to just copy the code from here so that you don't have to handle the dependency, the codebase is appropriately licensed.
It will return a coefficient of how "different" the two signals sound. You could do this in either the time domain, or the frequency domain. You'd get different numbers, but both would correlate with how "different" the sounds are from each other.
But "difference" may not describe the differences accurately enough for your use case. For instance, you may care a lot about volume differences, and not very much about timbral differences, but the spectral flux metric doesn't take that into account. You may wish to run each signal through feature extractors first, find other statistics about their properties such as their perceptual volume, their brightness, etc, and then take a weighted euclidean distance between those data, which would provide a more tailored "difference" metric to what you need for your purpose.
Happy to elaborate further, but this is already pretty long for a SO answer.

Generating random unique data takes too long and eats 100% CPU

WARNING: CPU Usage goes to 100%, be careful.
Link to the jsFiddle
This script has been written to design a dynamic snake and ladder board. Everytime the page is refreshed a new board is created. Most of the time all of the background images do not appear, and the CPU usage goes up to 100%. But on occasion all of them appear and the CPU usage is normal.
Opera shows some of the background images, Firefox lags and asks me if I wish to stop the script.
I believe that the problem is with these lines of code:
for(var key in origin) // Need to implement check to ensure that two keys do not have the same VALUES!
{
if(origin[key] == random_1 || origin[key] == random_2 || key == random_2) // End points cannot be the same AND starting and end points cannot be the same.
{
valFlag = 1;
}
console.log(key);
}
Your algorithm is very ineffective. When array is almost filled up, you literally do millions of useless iterations until you're in luck and RNG accidentally picks missing number. Rewrite it to:
Generate an array of all possible numbers - from 1 to 99.
When you need a random numbers, generate a random index in current bounds of this array, splice element and this random position, removing it from array and use its value as your desired random number.
If generated numbers don't fit some of your conditions (minDiff?) return them back to array. Do note, that you can still stall in loop forever if everything that is left in array is unable to fit your conditions.
Every value you pull from array in this way is guaranteed to be unique, since you originally filled it with unique numbers and remove them on use.
I've stripped drawing and placed generated numbers into array that you can check in console. Put your drawing back and it should work - numbers are generated instantly now:
var snakes = ['./Images/Snakes/snake1.png','./Images/Snakes/snake2.jpg','./Images/Snakes/snake3.gif','./Images/Snakes/snake4.gif','./Images/Snakes/snake5.gif','./Images/Snakes/snake6.jpg'];
var ladders = ['./Images/Ladders/ladder1.jpg','./Images/Ladders/ladder2.jpg','./Images/Ladders/ladder3.png','./Images/Ladders/ladder4.jpg','./Images/Ladders/ladder5.png'];
function drawTable()
{
// Now generating snakes.
generateRand(snakes,0);
generateRand(ladders,1);
}
var uniqNumbers = []
for(var idx = 1; idx < 100; idx++){ uniqNumbers.push(idx) }
var results = []
function generateRand(arr,flag)
{
var valFlag = 0;
var minDiff = 8; // Minimum difference between start of snake/ladder to its end.
var temp;
for(var i = 0; i< arr.length; ++i) {
var valid = false
// This is the single place it still can hang, through with current size of arrays it is highly unlikely
do {
var random_1 = uniqNumbers.splice(Math.random() * uniqNumbers.length, 1)[0]
var random_2 = uniqNumbers.splice(Math.random() * uniqNumbers.length, 1)[0]
if (Math.abs(random_1 - random_2) < minDiff) {
// return numbers
uniqNumbers.push(random_1)
uniqNumbers.push(random_2)
} else {
valid = true
}
} while (!valid);
if(flag == 0) // Snake
{
if(random_1 < random_2) // Swapping them if the first number is smaller than the second number.
{
var temp = random_1; random_1 = random_2; random_2 = temp
}
}
else // Ladders
{
if(random_1>random_2) // Swapping them if the first number is greater than the second number.
{
var temp = random_1; random_1 = random_2; random_2 = temp
}
}
// Just for debug - look results up on console
results.push([random_1, random_2])
}
}
drawTable()
I had a problem like this using "HighCharts", in a for loop - "browsers" have an in-built functionality to detect dead scripts or infinite loops. So the browsers halts or pop-ups up a message saying not responding. Not sure if you have that symptom!
This was resulted from a "loop" with a large pool of data. I wrote a tutorial on it on CodeProject, you might try it, and it might be your answer.
http://www.codeproject.com/Tips/406739/Preventing-Stop-running-this-script-in-Browsers

Categories