I'm trying to create a data generator, which I verified was working by itself in pure js. TFJS documentation for it is here, with two examples:
https://js.tensorflow.org/api/latest/#data.generator
I'd like to use a tf.data.generator as this datasets requires elaborate preprocessing. A minimal example is as follows:
const tf = require('#tensorflow/tfjs-node');
class dataGeneratorGenerator {
constructor(test) {
this.test = test
}
* dataGenerator() {
let len = this.test.length
let idx = 0
while (idx < len) {
idx++
console.log(idx)
yield this.test[idx]
}
}
}
let dgg = new dataGeneratorGenerator(['hi', 'hi2', 'hi3'])
let trainDs = tf.data.generator(dgg.dataGenerator);
trainDs.forEachAsync(e => console.log(e));
The error is as follows:
TypeError: Error thrown while iterating through a dataset: Cannot read property 'test' of undefined
Iterating through our datagenerator in pure javascript works:
let dgg = new dataGeneratorGenerator(['hi', 'hi2', 'hi3'])
let dg = dgg.dataGenerator()
console.log(dgg.next())
console.log(dgg.next())
console.log(dgg.next())
My understanding is that we are only passing dataGenerator into tf.data.generator instead of the entire class. Then, how is it possible to input variables into tf.data.generator? Thanks.
One can simply use an arrow function.
const tf = require('#tensorflow/tfjs-node');
function* dataGenerator(test) {
let len = test.length
let idx = 0
while (idx < len) {
idx++
console.log(idx)
}
}
let trainDs = tf.data.generator(() => dataGenerator(['hi', 'hi2', 'hi3']));
trainDs.forEachAsync(e => console.log(e));
Related
I am using around 60000 + 10000 Images for creating the training and validation dataset using a generator
( Images used are the MNIST Images of Handwritten Digits in the PNG Format ).
But when I fit the model using them, "Cleanup called" message logs are constantly getting printed.
function* dataGenerator(type) {
const dataRoot = `MNIST/${type}-Data`;
const labels = fs.readdirSync(dataRoot);
for (let _idx = 0; _idx < labels.length; _idx++) {
const label = labels[_idx];
const files = fs.readdirSync(`${dataRoot}/${label}`);
for (let idx = 0; idx < files.length; idx++) {
const img = fs.readFileSync(`${dataRoot}/${label}/${files[idx]}`);
const imageTensor = tf.node.decodePng(img, 1);
const oneHotArr = new Array(labels.length).fill(0);
oneHotArr[label] = 1;
const oneHotTensor = tf.tensor1d(oneHotArr);
yield { xs: imageTensor, ys: oneHotTensor };
};
};
}
const trainingDataset = tf.data.generator(() => dataGenerator("Training"))
.shuffle(100)
.batch(100);
const validationDataset = tf.data.generator(() => dataGenerator("Validation"))
.shuffle(100)
.batch(100);
// Fitting the model
await model.fitDataset(trainingDataset, {
epochs: 5,
validationData: validationDataset
});
What am I doing wrong ?
nothing. message is info-only and comes from underlying tensorflow c library (where it was introduced at the wrong message level) used by tfjs-node.
planned upgrade of shared library from 2.7.3 to 2.9.1 will take care of that - it should be part of tfjs 3.21 once its released.
My understanding of revealing module pattern is as follows:
CODE #1
const iifeModulePattern = (function () {
const secretSeed = 999; // ← private
const logSecretCode = function(){console.log(secretSeed+1)};
return {methodForPublic : logSecretCode};
})();
iifeModulePattern.methodForPublic();
So far I hope I'm not wrong. My question is:
Won't the following Code #2 serve same purpose?
If it does, why is Code #1 popular than Code #2?
If it doesn't, what's the difference?
CODE #2
const modulePattern = () => {
const secretSeed = 999; // ← private
const logSecretCode = function(){console.log(secretSeed+1)};
return {methodForPublic : logSecretCode};
};
modulePattern().methodForPublic();
I won't store "secret" codes (like passwords) in this way. The above codes are just examples.
const iifeModulePattern = (function() {
let value = 0; // private
return {
next: () => (value = 134775813 * value + 1) >>> 0
};
})();
for (let i = 0; i < 5; ++i) {
console.log("iifeModulePattern.next()", i, iifeModulePattern.next());
}
console.log("The basic IIFE.");
console.log("");
const modulePattern = () => {
let value = 0; // private
return {
next: () => (value = 134775813 * value + 1) >>> 0
};
};
for (let i = 0; i < 5; ++i) {
console.log("modulePattern().next()", i, modulePattern().next());
}
console.log("Kinda pointless this way, ain't it? Everytime, the sequence starts all over.");
console.log("");
const instance1 = modulePattern();
const instance2 = modulePattern();
for (let i = 0; i < 10; ++i) {
console.log("instance1.next()", i, instance1.next());
if (i & 1) {
console.log("instance2.next()", i, instance2.next());
}
}
console.log("This usage makes more sense.\nAnd the two instances progress independant of each other.");
.as-console-wrapper {top:0;max-height:100%!important}
Won't the following Code #2 serve same purpose?
Yes, No, maybe; it depends on how you use it.
If it doesn't, what's the difference?
iifeModulePattern is a singleton, modulePattern() is a factory.
If it does, why is Code #1 popular than Code #2?
What's the purpose of giving the factory a name, store it in a variable if all you'll ever do is to call it once, right here, right now?
I have to generate value with Alphanumeric. Here I've mentioned my function.
* Doubt in this function there is a key called category_no I have increment this key by Auto.
Expected Format: min value: C0001,C0002,C0003,,,,max value:C9999.
// Inserting New Category
async function postCategory() {
for (i = 0; i < categoryJson.length; i++) {
categoryDefault = {};
categoryDefault['category_no'] =
categoryDefault['category_overview'] = "No overview"
categoryDefault['category_description'] = "No description"
categoryDefault['category_created_date'] = new Date()
categoryDefault['category_modified_date'] = new Date()
const { error } = await
CategoryModel.validateNewCategory(categoryDefault)
let mongodb = await MongoDB.connect("ecomm_prod_db_category");
let result = await mongodb.insertOne(categoryDefault);
};
};
You can create a function as following to generate catagory_no,
Use String.prototype.padStart to keep the length of string 5 with 0 appended.
For counter use the unnamed self-invoking function passing the count as closure.
let catagory = (() => {
count = 1;
return () => {
if(count < 9999) {
return `C${(count++).toString().padStart(4, '0')}`
} throw('Max Limit Reached')
}
})();
console.log(catagory())
console.log(catagory())
console.log(catagory())
Problem
I'm trying to implement some sort of "fuzzy search" in my Node.js based project.
Fuzzy search is a search that returns results even if the string didn't match exactly.
I found this code in another stackoverflow thread. Code is below.
It's quite good, but the problem is - it's synchronous It slows down the whole program when it searches through a large array.
Question
ES6 methods are welcomed. Needs to work only in the latest Chrome, so any JS methods will work.
Are there any new JS methods that would optimize this function?
Am I doing something wrong there that makes it even slower?
Can I turn this function into an async function that returns a promise? Will it stop freezing the app during the search then?
Are there any better "fuzzy search" implementations you know of? (I found a module called fuzzysort, can't say if it's that much better though, it won't return "folder test" if you type "test folder" (wrong order) so it's not that good)
Code
Calling search function
searchArray is an array of paths it searches through, e.g.: ["C:\\test", "C:\\file.txt"...] (0.5 - 5 million paths)
searchQuery is a string without spaces, e.g.: filetxt
search () {
const fuzzySearch = this.fuzzySearch(this.searchQuery.toLowerCase(), this.searchArray)
let result = fuzzySearch.filter(element => element.relevance >= 0.3)
// sort by relevance
var sortedResults = result.sort((a, b) => parseFloat(b.relevance) - parseFloat(a.relevance)).map(item => item.name);
this.searchResults = sortedResults
},
The fuzzy search function
fuzzySearch (searchQuery, searchArray) {
const get_bigrams = function(string) {
const s = string.toLowerCase();
const v = new Array(s.length - 1);
for (let i = 0, end = v.length; i <= end; i++) {
v[i] = s.slice(i, i + 2);
}
return v;
};
const string_similarity = function(str1, str2) {
if ((str1.length > 0) && (str2.length > 0)) {
const pairs1 = get_bigrams(str1);
const pairs2 = get_bigrams(str2);
const union = pairs1.length + pairs2.length;
let hit_count = 0;
for (let x of Array.from(pairs1)) {
for (let y of Array.from(pairs2)) {
if (x === y) {
hit_count++;
}
}
}
if (hit_count > 0) {
return ((2.0 * hit_count) / union);
}
}
return 0.0;
};
let results = [];
for (let name of searchArray) {
// I added .match to use only the base filename (name+ext) not the whole path, and removed all characters
let filteredPath = name.match(/[^\\\/]+$/)[0].replace(/[^A-Za-z0-9.]+/g, '')
const relevance = string_similarity(searchQuery, filteredPath);
const obj = {name, relevance};
results.push(obj);
}
return results
},
I have the following code
var utils = require(`${__dirname}/../../utils/utils.js`);
...
let object = utils.parse(input);
if (object === undefined){
let helper = utils.recognize(input);
msg.channel.sendMessage("\"" + input + "\" not recognized. Did you mean \"" + helper[0] + "\"?");
object = utils.parse(helper[0]);
}
//code related to object
console.log(object.strLength);
where "parse" tries to match the input to an object in a database, and "recognize" tries to find the best match if the input is spelled incorrectly (Levenshtein) (along with additional info such as how close the match was).
Currently the issue is that the code is ran asynchronously; "object.strLength" returns an undefined before utils.recognize() returns a value. If I copy/paste the recognize() and parse() functions into the file, then the code is run synchronously and I do not run into any issues. However I would rather keep those functions in a separate file as I reuse them in other files.
Is there a way to specify that the functions in utils must be synch? I know that there are libraries that convert asynch into synch but I prefer to use as few libraries as I can help it. I tried to have the recognize functions return a Promise but it ended up as a jumbled mess
edit: here's parse. I did not think it was necessary to answer this question so I did not include it initially:
var db = require(`${__dirname}/../data/database.js`);
...
var parse = (input) => {
let output = db[output];
if (output === null) {
Object.keys(db).forEach((item) => {
if (db[item].num === parseInt(input) || (db[item].color + db[item].type === input)){
output = db[item];
return false;
}
});
}
return output;
}
I solved the issue, thanks everyone. Here's what was wrong, it was with recognize(). It was my mistake to not show the code for it initially.
Original recognize:
var recognize = (item) => {
//iterate through our databases and get a best fit
let bestItem = null;
let bestScore = 99999; //arbitrary large number
//let bestType = null;
//found algorithm online by milot-mirdita
var levenshtein = function(a, b) {
if (a.length == 0) { return b.length; }
if (b.length == 0) { return a.length; }
// swap to save some memory O(min(a,b)) instead of O(a)
if(a.length > b.length) {
let tmp = a;
a = b;
b = tmp;
}
let row = [];
for(let i = 0; i <= a.length; i++) {
row[i] = i;
}
for (let i = 1; i <= b.length; i++) {
let prev = i;
for (let j = 1; j <= a.length; j++) {
let val;
if (b.charAt(i-1) == a.charAt(j-1)) {
val = row[j-1]; // match
} else {
val = Math.min(row[j-1] + 1, // substitution
prev + 1, // insertion
row[j] + 1); // deletion
}
row[j - 1] = prev;
prev = val;
}
row[a.length] = prev;
}
return row[a.length];
}
//putting this here would make the code work
//console.log("hi");
Object.keys(db).forEach((key) => {
if (levenshtein(item, key) < bestScore) {
bestItem = key;
bestScore = levenshtein(item, key);
}
});
return [bestItem, bestScore];
}
My solution was to move the levenshtein function outside of the recognize function, so if I wanted to I can call levenshtein from another function
#user949300 and #Robert Moskal, I changed the forEach loop into a let...in loop. There is no functional difference (as far as I can tell) but the code does look cleaner.
#Thomas, I fixed the let output = db[output]; issue, oops.
Again, thanks for all of your help, I appreciate it. And happy New Year too