NodeJS match with regex gets last word than after space

NodeJS match with regex gets last word than after space - javascript

var prefix = '.'
var str = '.kick blah 1 2 3'.match(`${prefix}kick (.*) (.*)`);
console.log(str)
result:
[ 'blah 1 2', '3', index: 0, input: '.kick blah 1 2 3', ]
I wanted result to be
[ 'blah', '1 2 3', index: 0, input: '.kick blah 1 2 3', ]

You need to make the first group non-greedy, or else "." will keep matching..
Change the regex to ${prefix}kick (.*?) (.*)

Related

How to fix invalid JSON with RegExp in Javascript?

This is what I've tried
// input
let input = "{id: 1, name: apple, qty: 2, colors: [{id: 1, hex: #f95}], store: {id: 1, name: Apple Store}}"
let result = input.replace((/([\w]+)(:)/g), "\"$1\"$2");
// {"id": 1, "name": apple, "qty": 2, "colors": [{"id": 1, "hex": #f95}], "store": {"id": 1, "name": Apple Store}}
And then I just replace it like, replaceAll(': ', ': "'). I think it's not good practice to resolve it, may there is someone who can help me with this problem, thank you so much.

You can convert the stated string that looks almost like an object into an actual JavaScript object with the following assumptions:
keys are composed of alphanumeric and underscores chars
values are treated as numbers if they have the format of a number, e.g. an optional minus sign, followed by digits with optional .
values are treated as a string unless it has the form of a number, or start with [ (array) or { (object)
string values may not contain , or }
const input = "{id: 1, name: apple, qty: 2, colors: [{id: 1, hex: #f95}], store: {id: 1, name: Apple Store}}";
const regex1 = /([,\{] *)(\w+):/g;
const regex2 = /([,\{] *"\w+":)(?! *-?[0-9\.]+[,\}])(?! *[\{\[])( *)([^,\}]*)/g;
let json = input
.replace(regex1, '$1"$2":')
.replace(regex2, '$1$2"$3"')
let result = JSON.parse(json);
console.log(JSON.stringify(result, null, ' '));
Output:
{
"id": 1,
"name": "apple",
"qty": 2,
"colors": [
{
"id": 1,
"hex": "#f95"
}
],
"store": {
"id": 1,
"name": "Apple Store"
}
}
Explanation of regex1:
([,\{] *) -- capture group 1: , or {, followed by optional spaces
(\w+) -- capture group 2: 1+ word chars (alphanumeric and underscore)
: -- literal :
replace '$1"$2":' -- capture group 1, followed by capture group 2 enclosed in quotes, followed by colon
Explanation of regex2:
([,\{] *"\w+":) -- capture group 1: , or {, followed by optional spaces, quote, 1+ word chars, quote, colon
(?! *-?[0-9\.]+[,\}]) -- negative lookahead for optional spaces, a number, followed by , or }
(?! *[\{\[]) -- negative lookahead for optional spaces, followed by { or [
( *) -- capture group 2: optional spaces
([^,\}]*) -- capture group 3: everything that is not a , or }
replace '$1$2"$3"' -- capture group 1, followed by capture group 2, followed by capture group 3 enclosed in quotes
Learn more about regex: https://twiki.org/cgi-bin/view/Codev/TWikiPresentation2018x10x14Regex

Thanks for all answers, I tried this way and its works
class FixJson {
constructor() {
this.run = (json) => {
const fixDataType = (json) => {
for (const key in json) {
if (json.hasOwnProperty(key)) {
const value = json[key];
if (typeof value === 'object') {
fixDataType(value);
} else if (value === 'true' || value === 'false') {
json[key] = value === 'true';
} else if (!isNaN(value)) {
json[key] = Number(value);
}
}
}
return json;
}
// use the replace function to add double quotes around the property names
const fixedJson = json.replace(/([a-zA-Z0-9!##\$%\^\&*\)\(+=._-]+)/g, '"$1"');
// use the JSON.parse function to parse the fixed JSON string into a JavaScript object
const obj = JSON.parse(fixedJson.replaceAll('" "', ' '));
// fix json data type, and return the result
return fixDataType(obj)
}
}
}
const fix = new FixJson()
let result = fix.run("<your_invalid_json>")

Why is my program only detecting integer tokens in NodeJS?

I decided to try and make a language tokenizer (don't even know if that's a real word) and made around 4 tokens that successfully tokenized a full program with line breaks and multiple spaces etc, but I just started from scratch and am running into a problem; I have two tokens currently, int and variableSet. The program being read has the content of 1 sv 1 2 as just a test, and the tokenizer returns an array of int, int, int, int with sv having a value of 1.
const code = `1 sv 1 2`
var validTokens = require("./tokens"); // just an object with the structure tokenName: RegExp object
function reverseTokenSearch(regex){
for (const [index, [key, value]] of Object.entries(Object.entries(validTokens))) {
if (value === regex){
return key;
}
}
return false;
}
function throughTokens (code,lastidx=0) {
for (const tokentype in validTokens){ // loop through all of the valid tokens
validTokens[tokentype].lastIndex = lastidx;
const searchresult = validTokens[tokentype]
const tokenresult = searchresult.exec(code.toString());
if (tokenresult) {
return [searchresult, tokenresult[0], tokenresult.index, lastidx+tokenresult[0].length+1, tokenresult.groups]
}
}
}
function resetIndexes (){
for (const tt in validTokens){
validTokens[tt].lastidx = 0;
}
}
resetIndexes();
var lst = 0
var tokens = []
var res = 1;
console.log("\ntokenizer; original input:\n"+code+"\n");
while (lst !== undefined && lst !== null){
if (lst > code.length){
console.error("Fatal error: tokenizer over-reached program length.")
process.exit(1)
}
const res = throughTokens(code,lst);
if(res){
console.log(res,lst)
const current = []
current[0] = reverseTokenSearch(res[0])
current[1] = res[1]
const currentidx = 2
for (const x in res[4]) {
current[currentidx] = x;
}
tokens.push(current)
lst = res[3]
} else {
lst = null
}
}
console.log(tokens)
// What outputs:
/*
tokenizer; original input:
1 sv 1 2
[ /\d+/g { lastidx: 0 }, '1', 0, 2, undefined ] 0
[ /\d+/g { lastidx: 0 }, '1', 5, 4, undefined ] 2
[ /\d+/g { lastidx: 0 }, '1', 5, 6, undefined ] 4
[ /\d+/g { lastidx: 0 }, '2', 7, 8, undefined ] 6
[ [ 'int', '1' ], [ 'int', '1' ], [ 'int', '1' ], [ 'int', '2' ] ]
*/
I think it's because of the order of the array but I have no idea where to start fixing it and would greatly appreciate a push in the right direction.
(edit): I tried removing the "g" flag on the RegExp object and all it did was broke the program into an infinite loop.

The problem is that you are silently assuming that every match found by the regex will start at lastidx which is not always the case. If you log tokenresult and lastidx before returning from throughTokens, you will see:
0
[ '1', index: 0, input: '1 sv 1 2', groups: undefined ]
2
[ '1', index: 5, input: '1 sv 1 2', groups: undefined ]
4
[ '1', index: 5, input: '1 sv 1 2', groups: undefined ]
6
[ '2', index: 7, input: '1 sv 1 2', groups: undefined ]
In the second iteration, the match is at index 5, but you assume it to be at index 2, which it is not (whereby you also incorrectly increment lastidx to 4). You also at the end of throughTokens assume that every match is followed by a space, which is also incorrect for the last token.
Simplest way to fix this code is to replace
//if (tokenresult) { // replace in throughTokens with below
if (tokenresult && tokenresult.index === lastidx) {
to be sure that you're matching at the right place and then in the main loop
//while (lst !== undefined && lst !== null){ // replace with below
while (lst !== undefined && lst !== null && lst < code.length){
to handle the end of the input correctly.
With these changes, the printouts that we added earlier will be
0
[ '1', index: 0, input: '1 sv 1 2', groups: undefined ]
2
[ 'sv', index: 2, input: '1 sv 1 2', groups: undefined ]
5
[ '1', index: 5, input: '1 sv 1 2', groups: undefined ]
7
[ '2', index: 7, input: '1 sv 1 2', groups: undefined ]
which is correct and the output would be
[
[ 'int', '1' ],
[ 'variableSet', 'sv' ],
[ 'int', '1' ],
[ 'int', '2' ]
]
Recommendations
There are a lot of other logical and programmatical problems with this code which I will not go into but my advice is to go through every piece of the code and understand what it does and whether it could be done in a simpler way.
On a general level instead of returning an array with data [d1, d2, d3, ...] return an object with named properties { result: d1, index: d2, ... }. Then it is much easier for someone else to understand your code. Also go through naming of methods.
As far as this approach is concerned, if you know that there will be a space after each token, then extract only the current token and send to throughToken. Then you can make that function both more efficient and robust against errors.

Summarize the text and stick all the letters together

I have a text and want to summarize it , i want change this array :
Array 1
[
'CALX', '11.10', '21',
'01', '08', 'EGLD',
'USDT', 'LDFDFC', 'ZONE',
'238.5', '233', 'LEVERAGE',
'3', 'X', 'TARGET',
'1', '243.9', 'TARGET',
'2', '248', 'TARGET',
'3', '254', 'TARGET',
'4', '260', 'H',
'GD', 'S', 'AFCA'
]
to this :
Array 2
[
'CALX', '11.10', '21',
'01', '08', 'EGLDUSDTLDFDFCZONE',
'238.5', '233', 'LEVERAGE',
'3', 'XTARGET',
'1', '243.9', 'TARGET',
'2', '248', 'TARGET',
'3', '254', 'TARGET',
'4', '260', 'HGDSAFCA',
]
as you can see , I want all the letters to stick together until they reach a number,and each number should be in an element of the array
This is the code that can be used to convert text to an Array1
const input = 'CALX, [11.10.21 01:08] $EGLD/USDT #Ldfdfc zone : 238.5 - 233 "LEVERAGE" : 3x TARGET1 : 243.9 TARGET 2 : 248 TARGET 3 : 254 TARGET 4 : 260 h.gd.s afca. `~!##$%^&*()_-+=-/?><'
const text = text.toUpperCase().match(/[a-z]+|\d+(?:\.\d+)?/gi);
so how can i change the Array1 to Array2?
sorry for my English and thank you for your help.

Based on the initial string, to get the desired array as output you don't have to convert it to an array to process it again.
You can use a pattern similar like the one that you tried with an alternation | but instead of matching [a-z]+ you can capture 1 or more non digits using (\D+) in a group.
Then in the callback of replace, you can remove the unwanted characters if there is a match for the group 1. The unwanted characters are [\W_]+ or one more non word chars including the underscore.
If there is no group, you can return the match (the digits) between delimiters, where you can split on the delimiters afterwards to create the final array.
const input = 'CALX, [11.10.21 01:08] $EGLD/USDT #Ldfdfc zone : 238.5 - 233 "LEVERAGE" : 3x TARGET1 : 243.9 TARGET 2 : 248 TARGET 3 : 254 TARGET 4 : 260 h.gd.s afca. `~!##$%^&*()_-+=-/?><'
text = input
.toUpperCase()
.replace(/\d+(?:\.\d+)?|(\D+)/g,
(m, g1) => g1 ? g1.replace(/[\W_]+/g, '') : `#${m}#`
);
console.log(text.split(/#+/));

One of the solution could look like this:
let arr = [
'CALX', '11.10', '21',
'01', '08', 'EGLD',
'USDT', 'LDFDFC', 'ZONE',
'238.5', '233', 'LEVERAGE',
'3', 'X', 'TARGET',
'1', '243.9', 'TARGET',
'2', '248', 'TARGET',
'3', '254', 'TARGET',
'4', '260', 'H',
'GD', 'S', 'AFCA'
]
function handleArray(a) {
let result = [];
let stringItem = '';
a.forEach((el) => {
// If number then check if we have previous string and push
// it to the result.
// Also push number as next element
if (/\d/.test(el)) {
if (stringItem) {
result.push(stringItem);
// Clear string variable
stringItem = '';
}
result.push(el)
} else {
// Concat ongoing string, don't push to result
stringItem += el;
}
})
return result;
}
console.log(handleArray(arr))

How to do two array operations parallelly in Javascript

I have two arrays of objects having structures like this:
let groups = [
{
word: 'ok',
id: 1,
},
{
word: 'hi',
id: 2,
}
]
let words = [
{
name: 'hello',
id: 1,
meaning: 'Grreeting !',
example: 'Hello how are you ?',
groupId: 1
},
{
name: 'morning',
id: 3,
meaning: 'First sign of the day !',
example: 'Good Morning ?',
groupId: 2
}
]
A group has many words. I have an empty array results[] and I will store all matching words there.
Now I want to search a keyword hi in both, groups and words array. The current approach I am following is :
First, I will map through groups array, and if I keyword hi matched with any group name,
then I will push all the words having that groupId into my results array.
Now I will map through the words array and if keyword hi matches with any word name then I will push that into results array.
Now, I want to do both these operations parallelly. Finding all the matches into words array and groups array parallelly and pushing data to results array.
Is there any way I can do this?

This can be done by first combining both arrays and then using the filter() array prototype function.
let groups = [
{
word: 'ok',
id: 1,
},
{
word: 'hi',
id: 2,
}
]
let words = [
{
name: 'hello',
id: 1,
meaning: 'Grreeting !',
example: 'Hello how are you ?',
groupId: 1
},
{
name: 'morning',
id: 3,
meaning: 'First sign of the day !',
example: 'Good Morning ?',
groupId: 2
}
];
let search = 'hi'; // get the search term from somewhere
let result = [ ...groups, ...words ] // combine arrays into one
.filter(({ word, name }) => ( // 'extract' word/name properties
word === search || name === search // return true when word or name matches searchterm
));
Little sidenote: in this solution searchterm can't be undefined, since that will always return true in the filter function.

Although you could technically do this with a single loop, doing so would be at the cost of clarity and simplicity. Basically you'd do a loop with the index going from 0 to Math.max(groups.length, words.length) and then do your check on each array:
for (let n = 0, len = Math.max(groups.length, words.length); n < len; ++n) {
if (n < groups.length) {
// Check `groups[i]`
}
if (n < words.length) {
// Check `words[i]`
}
}
That way, you have a single loop, which is about as close to "in parallel" as you're going to get without using a web worker (which would almost certainly be overkill). But again, it doesn't buy you much, and costs you clarity and simplicity.

Using async functions for the filter operations.
Not sure what you are doing that requires this, but note that they are not running multithreaded, and it's unlikely that this will be helpful or useful in solving the actual problem. Also, the order these execute in will be deterministic for your example.
const matches = async(list, key, match) => list.filter(x => x[key] === match);
let groups = [{
word: 'ok',
id: 1,
},
{
word: 'hi',
id: 2,
}
]
let words = [{
name: 'hello',
id: 1,
meaning: 'Grreeting !',
example: 'Hello how are you ?',
groupId: 1
},
{
name: 'morning',
id: 3,
meaning: 'First sign of the day !',
example: 'Good Morning ?',
groupId: 2
}
]
let matched = [];
Promise.all([
matches(groups, 'word', 'hi').then(m => {
matched = matched.concat(m);
console.log('groups matched:', matched);
}),
matches(words, 'name', 'hello').then(m => {
matched = matched.concat(m);
console.log('words matched:', matched);
})
]).then(res => {
console.log("matched:", matched);
console.log(".all results:", [...res[0], ...res[1]]);
})
console.log("matched immediate:", matched);
setTimeout(() => console.log("matched event loop delayed:", matched), 0);

How to trim the last one or last two characters of a string

I have an object with a bunch of strings:
[
{
date: "03/12/2014",
name: "mr blue",
title: "math teacher -"
},
{
date: "04/02/2015",
name: "mrs yellow",
title: "chemistry teacher"
},
{
date: "11/04/2014",
name: "mrs green",
title: "chemistry teacher - "
},
]
How can i strip the - from the title field if that string contains a -.
I know a can perform a slice/subtring:
var myvalue = myobject.title.substring(0, myobject.title.length-1);
However this will apply for all cases, and not just the ones that contain the -

Use replace:
var myvalue = myobject.title.replace(/\s*-\s*$/,'');
Bonus: with this regular expression only a dash at the end will be removed (along with the spaces around).

var title = 'math teacher -';
title = title.replace('-', '').trim();
document.write(title);
Update
Above will fail if title has dashes in the middle. Therefore, using lastIndexOf you can do
title = title.substring(0,oldString.lastIndexOf("-")).trim;

We Keep Coding

JavaScript is the programming language of the Web.

NodeJS match with regex gets last word than after space - javascript

var prefix = '.' var str = '.kick blah 1 2 3'.match(`${prefix}kick (.) (.)`); console.log(str) result: [ 'blah 1 2', '3', index: 0, input: '.kick blah 1 2 3', ] I wanted result to be [ 'blah', '1 2 3', index: 0, input: '.kick blah 1 2 3', ]

You need to make the first group non-greedy, or else "." will keep matching.. Change the regex to ${prefix}kick (.?) (.)

Related

How to fix invalid JSON with RegExp in Javascript?

Why is my program only detecting integer tokens in NodeJS?

Summarize the text and stick all the letters together

How to do two array operations parallelly in Javascript

How to trim the last one or last two characters of a string

Categories

Resources

We Keep Coding

JavaScript is the programming language of the Web.

NodeJS match with regex gets last word than after space - javascript

var prefix = '.' var str = '.kick blah 1 2 3'.match(`${prefix}kick (.*) (.*)`); console.log(str) result: [ 'blah 1 2', '3', index: 0, input: '.kick blah 1 2 3', ] I wanted result to be [ 'blah', '1 2 3', index: 0, input: '.kick blah 1 2 3', ]

You need to make the first group non-greedy, or else "." will keep matching.. Change the regex to ${prefix}kick (.*?) (.*)

Related

How to fix invalid JSON with RegExp in Javascript?

Why is my program only detecting integer tokens in NodeJS?

Summarize the text and stick all the letters together

How to do two array operations parallelly in Javascript

How to trim the last one or last two characters of a string

Categories

Resources

var prefix = '.' var str = '.kick blah 1 2 3'.match(`${prefix}kick (.) (.)`); console.log(str) result: [ 'blah 1 2', '3', index: 0, input: '.kick blah 1 2 3', ] I wanted result to be [ 'blah', '1 2 3', index: 0, input: '.kick blah 1 2 3', ]

You need to make the first group non-greedy, or else "." will keep matching.. Change the regex to ${prefix}kick (.?) (.)