I decided to try and make a language tokenizer (don't even know if that's a real word) and made around 4 tokens that successfully tokenized a full program with line breaks and multiple spaces etc, but I just started from scratch and am running into a problem; I have two tokens currently, int and variableSet. The program being read has the content of 1 sv 1 2 as just a test, and the tokenizer returns an array of int, int, int, int with sv having a value of 1.
const code = `1 sv 1 2`
var validTokens = require("./tokens"); // just an object with the structure tokenName: RegExp object
function reverseTokenSearch(regex){
for (const [index, [key, value]] of Object.entries(Object.entries(validTokens))) {
if (value === regex){
return key;
}
}
return false;
}
function throughTokens (code,lastidx=0) {
for (const tokentype in validTokens){ // loop through all of the valid tokens
validTokens[tokentype].lastIndex = lastidx;
const searchresult = validTokens[tokentype]
const tokenresult = searchresult.exec(code.toString());
if (tokenresult) {
return [searchresult, tokenresult[0], tokenresult.index, lastidx+tokenresult[0].length+1, tokenresult.groups]
}
}
}
function resetIndexes (){
for (const tt in validTokens){
validTokens[tt].lastidx = 0;
}
}
resetIndexes();
var lst = 0
var tokens = []
var res = 1;
console.log("\ntokenizer; original input:\n"+code+"\n");
while (lst !== undefined && lst !== null){
if (lst > code.length){
console.error("Fatal error: tokenizer over-reached program length.")
process.exit(1)
}
const res = throughTokens(code,lst);
if(res){
console.log(res,lst)
const current = []
current[0] = reverseTokenSearch(res[0])
current[1] = res[1]
const currentidx = 2
for (const x in res[4]) {
current[currentidx] = x;
}
tokens.push(current)
lst = res[3]
} else {
lst = null
}
}
console.log(tokens)
// What outputs:
/*
tokenizer; original input:
1 sv 1 2
[ /\d+/g { lastidx: 0 }, '1', 0, 2, undefined ] 0
[ /\d+/g { lastidx: 0 }, '1', 5, 4, undefined ] 2
[ /\d+/g { lastidx: 0 }, '1', 5, 6, undefined ] 4
[ /\d+/g { lastidx: 0 }, '2', 7, 8, undefined ] 6
[ [ 'int', '1' ], [ 'int', '1' ], [ 'int', '1' ], [ 'int', '2' ] ]
*/
I think it's because of the order of the array but I have no idea where to start fixing it and would greatly appreciate a push in the right direction.
(edit): I tried removing the "g" flag on the RegExp object and all it did was broke the program into an infinite loop.
The problem is that you are silently assuming that every match found by the regex will start at lastidx which is not always the case. If you log tokenresult and lastidx before returning from throughTokens, you will see:
0
[ '1', index: 0, input: '1 sv 1 2', groups: undefined ]
2
[ '1', index: 5, input: '1 sv 1 2', groups: undefined ]
4
[ '1', index: 5, input: '1 sv 1 2', groups: undefined ]
6
[ '2', index: 7, input: '1 sv 1 2', groups: undefined ]
In the second iteration, the match is at index 5, but you assume it to be at index 2, which it is not (whereby you also incorrectly increment lastidx to 4). You also at the end of throughTokens assume that every match is followed by a space, which is also incorrect for the last token.
Simplest way to fix this code is to replace
//if (tokenresult) { // replace in throughTokens with below
if (tokenresult && tokenresult.index === lastidx) {
to be sure that you're matching at the right place and then in the main loop
//while (lst !== undefined && lst !== null){ // replace with below
while (lst !== undefined && lst !== null && lst < code.length){
to handle the end of the input correctly.
With these changes, the printouts that we added earlier will be
0
[ '1', index: 0, input: '1 sv 1 2', groups: undefined ]
2
[ 'sv', index: 2, input: '1 sv 1 2', groups: undefined ]
5
[ '1', index: 5, input: '1 sv 1 2', groups: undefined ]
7
[ '2', index: 7, input: '1 sv 1 2', groups: undefined ]
which is correct and the output would be
[
[ 'int', '1' ],
[ 'variableSet', 'sv' ],
[ 'int', '1' ],
[ 'int', '2' ]
]
Recommendations
There are a lot of other logical and programmatical problems with this code which I will not go into but my advice is to go through every piece of the code and understand what it does and whether it could be done in a simpler way.
On a general level instead of returning an array with data [d1, d2, d3, ...] return an object with named properties { result: d1, index: d2, ... }. Then it is much easier for someone else to understand your code. Also go through naming of methods.
As far as this approach is concerned, if you know that there will be a space after each token, then extract only the current token and send to throughToken. Then you can make that function both more efficient and robust against errors.
Related
I'm reading a sheet using the SheetJS (xlsx) extension to read from the excel file file like so:
var workbook = XLSX.readFile(req.file.path);
var sheet_name_list = workbook.SheetNames;
let sheet_json = XLSX.utils.sheet_to_json(workbook.Sheets[sheet_name_list[0]]);
When I run console.log(sheet_json) I get:
[
{
'Question*': 'Question 1',
'Number of Choices*': 2,
'Choice A*': 'Apple',
'Choice B*': 'Pineapple',
'Answer (A/B/C/D)*': 'A',
'Correct Marking*': 1,
'Negative marking*': 0
},
{
'Question*': 'Question 2',
'Number of Choices*': 3,
'Choice A*': 'Car',
'Choice B*': 'Bike',
'Choice C': 'Truck',
'Answer (A/B/C/D)*': 'C',
'Correct Marking*': 1,
'Negative marking*': 1
}
]
Now I loop over the sheet_json array and I can't validate the properties, the result is like this:
// i is the index of iterator. o is the object iterated
let sn = ["Question*", "Number of Choices*"]
let props = Object.keys(sheet_json[i]);
console.log(props); //===> false
console.log(props.findIndex(o=>o==sn[1])) //===> false
console.log(o.hasOwnProperty(sn[1])); //===> false
console.log("Number of Choices*" in o); //===> false
console.log(sn[1] in sheet_json[i]); //===> false
console.log(props.includes(sn[1].trim().toString())); //===> false
console.log(o["Number of Choices*"]); //===> undefined
console.log(o[sn[1]]!==undefined); //===> false
Why is everything showing up as undefined or false? Please help.
node -v: v13.14.0
npm -v: 6.14.4
xlsx: 0.16
Here's the excel file as well: DOWNLOAD
I tried replicating it here https://jsfiddle.net/t4ocdw67/.
sheet_json=[{...},{...}];
for(i in sheet_json)
{
let o = sheet_json[i];
let sn = ["Question*", "Number of Choices*"]
.... //more code here
}
It seems to be working fine, there might be a problem with iteration part maybe
I have two arrays of objects having structures like this:
let groups = [
{
word: 'ok',
id: 1,
},
{
word: 'hi',
id: 2,
}
]
let words = [
{
name: 'hello',
id: 1,
meaning: 'Grreeting !',
example: 'Hello how are you ?',
groupId: 1
},
{
name: 'morning',
id: 3,
meaning: 'First sign of the day !',
example: 'Good Morning ?',
groupId: 2
}
]
A group has many words. I have an empty array results[] and I will store all matching words there.
Now I want to search a keyword hi in both, groups and words array. The current approach I am following is :
First, I will map through groups array, and if I keyword hi matched with any group name,
then I will push all the words having that groupId into my results array.
Now I will map through the words array and if keyword hi matches with any word name then I will push that into results array.
Now, I want to do both these operations parallelly. Finding all the matches into words array and groups array parallelly and pushing data to results array.
Is there any way I can do this?
This can be done by first combining both arrays and then using the filter() array prototype function.
let groups = [
{
word: 'ok',
id: 1,
},
{
word: 'hi',
id: 2,
}
]
let words = [
{
name: 'hello',
id: 1,
meaning: 'Grreeting !',
example: 'Hello how are you ?',
groupId: 1
},
{
name: 'morning',
id: 3,
meaning: 'First sign of the day !',
example: 'Good Morning ?',
groupId: 2
}
];
let search = 'hi'; // get the search term from somewhere
let result = [ ...groups, ...words ] // combine arrays into one
.filter(({ word, name }) => ( // 'extract' word/name properties
word === search || name === search // return true when word or name matches searchterm
));
Little sidenote: in this solution searchterm can't be undefined, since that will always return true in the filter function.
Although you could technically do this with a single loop, doing so would be at the cost of clarity and simplicity. Basically you'd do a loop with the index going from 0 to Math.max(groups.length, words.length) and then do your check on each array:
for (let n = 0, len = Math.max(groups.length, words.length); n < len; ++n) {
if (n < groups.length) {
// Check `groups[i]`
}
if (n < words.length) {
// Check `words[i]`
}
}
That way, you have a single loop, which is about as close to "in parallel" as you're going to get without using a web worker (which would almost certainly be overkill). But again, it doesn't buy you much, and costs you clarity and simplicity.
Using async functions for the filter operations.
Not sure what you are doing that requires this, but note that they are not running multithreaded, and it's unlikely that this will be helpful or useful in solving the actual problem. Also, the order these execute in will be deterministic for your example.
const matches = async(list, key, match) => list.filter(x => x[key] === match);
let groups = [{
word: 'ok',
id: 1,
},
{
word: 'hi',
id: 2,
}
]
let words = [{
name: 'hello',
id: 1,
meaning: 'Grreeting !',
example: 'Hello how are you ?',
groupId: 1
},
{
name: 'morning',
id: 3,
meaning: 'First sign of the day !',
example: 'Good Morning ?',
groupId: 2
}
]
let matched = [];
Promise.all([
matches(groups, 'word', 'hi').then(m => {
matched = matched.concat(m);
console.log('groups matched:', matched);
}),
matches(words, 'name', 'hello').then(m => {
matched = matched.concat(m);
console.log('words matched:', matched);
})
]).then(res => {
console.log("matched:", matched);
console.log(".all results:", [...res[0], ...res[1]]);
})
console.log("matched immediate:", matched);
setTimeout(() => console.log("matched event loop delayed:", matched), 0);
My if/else statement goes straight to else and I cant seem to figure out why. Here is my code:
var sentiment = require ('sentiment');
var twitterSentiment;
var geoColor;
var results;
var twit = new twitter({
consumer_key: credentials.consumer_key,
consumer_secret: credentials.consumer_secret,
access_token_key: credentials.access_token_key,
access_token_secret: credentials.access_token_secret
});
twit.stream(
'statuses/filter',
{ 'locations': location },
function(stream) {
stream.on('data', function(tweet) {
console.log(tweet.text);
results = sentiment (tweet.text);
twitterSentiment = results;
//Comparison of Sentiment Scores
if (twitterSentiment == 0) {
geoColor = '#B5B5B5';
}
else if (twitterSentiment < 0) {
geoColor = '#FC0828';
}
else {
geoColor = '#00DE1E';
}
console.log (geoColor);
});
});
This is an example output:
omg yes!!
#00DE1E
Do you think will actually understand? I want to ask mine the same question. Just not sure I'm ready to have that conversation.
#00DE1E
A thing of beauty by:
#youtube
#00DE1E
do you still do this??
#00DE1E
As you can see all the tweets are being identified by only one color; almost as if my if/else statement is not implemented correctly?
When I change console.log (geoColor); to console.log (results); This is my output:
omg yes!!
{ score: 1,
comparative: 0.25,
tokens: [ 'omg', 'yes' ],
words: [ 'yes' ],
positive: [ 'yes' ],
negative: [] }
Do you think will actually understand? I want to ask mine the same question. Just not sure I'm ready to have that conversation.
{ score: 1,
comparative: 0.041666666666666664,
tokens:
[
'do',
'you',
'think',
'will',
'actually',
'understand',
'i',
'want',
'to',
'ask',
'mine',
'the',
'same',
'question',
'just',
'not',
'sure',
'i\'m',
'ready',
'to',
'have',
'that',
'conversation' ],
words: [ 'want' ],
positive: [ 'want' ],
negative: [] }
A thing of beauty by:
#youtube
{ score: 3,
comparative: 0.25,
tokens:
[ 'a',
'thing',
'of',
'beauty',
'by',
'youtube', ],
words: [ 'beauty' ],
positive: [ 'beauty' ],
negative: [] }
do you still do this??
{ score: 0,
comparative: 0,
tokens:
[
'do',
'you',
'still',
'do',
'this' ],
words: [],
positive: [],
negative: [] }
As you can see each tweet has their individual sentiment score of respectively 1,1,3,0 So why is my if/else statement disregarding those numbers?
What can I change in my code so that my if/else statement correctly implements and considers the sentiment score of the tweets? My goal is to output the appropriate color for each tweet.
Your var results is an object, that contains many other attributes. In javascript the "if" clause will always return "true" for non-null object instance. When comparing an object to a number - any instance of a non-primitive object will return 1.
As you said, you want to compare the value of your score attribute, so what you need to do is reference your results.score in your if clause.
Changing to
twitterSentiment = results.score;
Should fix your issue.
You are setting twitterSentiment to the result object and comparing the whole object instead of just the score. Change your code to:
if (twitterSentiment.score == 0) {
geoColor = '#B5B5B5';
}
else if (twitterSentiment.score < 0) {
geoColor = '#FC0828';
}
else {
geoColor = '#00DE1E';
}
I have this array:
var myArray = [
{ familyName: 'one', subfamilies:
[ { subfamilyName: 'subOne', subItems:
[ { name: 'subOne', code: '1' },
{ name: 'subTwo', code: '2' }
] }
]
},
{ familyName: 'two', subfamilies:
[ { subfamilyName: 'subTwo', subItems:
[ { name: 'subOne', code: '1' },
{ name: 'subTwo', code: '2' },
{ name: 'subTwo', code: '3' }
] }
]
}
]
I need to divide that array in two diferent arrays with the same length if possible (my real array is so much longer), but I am having some problems getting it done. I create 2 blank array and, with for sentence read all the items. First I push the subItems in one blank array, but cannot get the way to create a new subFamily in a blank array variable and then push the sutItems array.
How can be this done?
Thanks a lot in advance.
var myOtherArray = myArray.splice(myArray.length / 2 | 0);
I have the following array of arrays:
[ 'markdown',
[ 'para', '"one"' ],
[ 'hr' ],
[ 'para', '"two"' ],
[ 'para', '"three"' ] ]
I made a loop to only match those arrays with double quotes:
for (i = 1; i < jsonml.length; i++) {
if (typeof jsonml[i][1] === "string" && jsonml[i][1].match(/"/g)) {
var p = jsonml[i]
console.log(p)
}
// more code
if (jsonml[i][0] === 'hr') {
var hr = jsonml[i]
var p = jsonml[i + 1]
hr.splice(0, 1, 'para', '* * *')
p.splice(1, 0, {'class': 'noind'})
}
For some reason console.log(p) only outputs:
[ 'para', '"one"' ]
[ 'para', '"three"' ]
Is [ 'hr' ] somehow making [ 'para', '"two"' ] not to match the if statement? If so how to fix it so that [ 'para', '"two"' ] is matched, too?
if (jsonml[i][0] === 'hr') {
var hr = jsonml[i]
var p = jsonml[i + 1]
hr.splice(0, 1, 'para', '* * *')
p.splice(1, 0, {'class': 'noind'})
}
EDIT:
I think the problem is with the if statement below, since console.log(jsonml[i][1]) outputs:
"one"
undefined
{ class: 'noind' }
"three"
I thought the if statements would execute in order?
The problem appears to be because you are modifying the array while you are still iterating over it. Which means that the [ 'para', '"two"' ] item is no longer there by the time you get round to it.
Usually, you should always avoid modifying a collection while you are looping over it. If you need to make changes then do them either before or afterwards (which can also be using a separate loop if you need to)