What would cause this JavaScript Regex titleize performance? - javascript

These toTitleCase functions rougly do the same thing.
But I'm curious to know why v1_toTitleCase would be faster than v5_toTitleCase?
And why v6_toTitleCase is faster than v3_toTitleCase? Since this is practically the same thing but v3_toTitleCase is specific to spaces.
I'd like to get the same performance as v2_toTitleCase but don't want to uppercase inputs like (hello => (Hello.
const input = "hello world! how are you today? Upper case Everything.";
const v1_toTitleCase = (str) => {
if (!str) {
return str;
}
const parts = str.toLowerCase()
.split(' ');
return parts.map(part => {
if (!part?.length) {
return part;
}
const firstLetter = part[0].toUpperCase();
const remainingLetters = part.substr(1, part.length);
return firstLetter + remainingLetters;
})
.join(' ');
}
const RegexFirstLetterOfWords = /(?:^| )(\S)/g;
const v2_toTitleCase = (str) => {
return str.replace(/\b[a-z]/g, letter => letter.toUpperCase());
};
const v3_toTitleCase = (str) => {
return str.replace(/(?:^| )(\S)/g, letter => letter.toUpperCase());
};
const v4_toTitleCase = (str) => {
return str.replace(RegexFirstLetterOfWords, letter => letter.toUpperCase());
};
const v5_toTitleCase = (str) => {
return str.replace(/(?:^| )(\w)/g, letter => letter.toUpperCase());
};
// underscore.string implementation.
const v6_toTitleCase = (str) => {
return str.replace(/(?:^|\s|-)\S/g, letter => letter.toUpperCase());
};
Here's a performance runner.
Thoughts?
Is there a faster regex that I'm unaware of like \b but for splitting text by whitespace?

Related

Replace each element of the array in String.replace

I need to replace all characters of an incoming string with the corresponding ones from an object, how can I do this using an array?
function handleCyrillic(str) {
const obj = {
й: "\\'e9",
ц: "\\'f6",
};
let res = "";
const arr = Object.keys(obj);
arr.forEach((item) => {
const regex = new RegExp(item, "gi");
res += str.replace(regex, function (s) {
return s
.split("")
.map((n) => (n = obj[n]))
.join("");
});
});
return res;
}
console.log(handleCyrillic("цй йй!!"));
I get in the console ц\'e9 \'e9\'e9!!\'f6й йй!!
I expected \'f6\'e9 \'e9\'e9!!
The problem is that res variable is being overwritten each time a character is replaced instead of appending the replacement to the result. This is updated and a bit refactored version:
function handleCyrillic(str) {
const obj = {
й: "\\'e9",
ц: "\\'f6",
};
let res = str;
Object.keys(obj).forEach((key) => {
const regex = new RegExp(key, "gi");
res = res.replace(regex, obj[key]);
});
return res;
}
You just need to join your obj keys and surround with square brackets for the or condition and in the replace method callback replaces the match string with the corresponding replacement in the obj.
function handleCyrillic(str) {
const obj = {
й: "\\'e9",
ц: "\\'f6",
};
const arr = Object.keys(obj);
const regex = new RegExp(`[${arr.join()}]`, "gi");
return str.replace(regex, function(s) {
return obj[s]
});
}
console.log(handleCyrillic("цй йй!!"));
In your code you are replacing on the str which is parameter which never going to be change and you are concate that into the res which is function return variable that's why you are not getting desired output
function handleCyrillic(str) {
const obj = {
й: "\\'e9",
ц: "\\'f6",
};
let res = str;
const arr = Object.keys(obj);
arr.forEach((item) => {
const regex = new RegExp(item, "gi");
res = res.replace(regex,obj[item]);
});
return res;
}
console.log(handleCyrillic("цй йй!!"));
Try this. At first split the string. If a character exists as a key in the obj, map() returns the obj value, if not it returns the current character. In the end join the string.
function handleCyrillic(str) {
const obj = {
й: "\\'e9",
ц: "\\'f6",
};
return str.split('').map(char => obj[char] || char).join('');
}
console.log(handleCyrillic("цй йй!!"));
There is no need for a regular expression here. You can use Array.from with its callback:
const obj = {
й: "\\'e9",
ц: "\\'f6",
};
const handleCyrillic = str => Array.from(str, ch => obj[ch] ?? ch).join("");
console.log(handleCyrillic("цй йй!!"));
The ?? operator deals with cases where there is no mapping for a character. This still allows a mapping to the empty string to be included in obj, which means you want to remove a character.
There's no need for this conversion table in the first place. Basic Unicode Cyrillic is continuous in range 410...44f and Win-1251 is c0..ff, so you can just do some codepoint arithmetics to convert between both:
function handleCyrillic(str) {
let res = ''
for (let chr of str) {
let cp = chr.codePointAt(0)
if (0x410 <= cp && cp <= 0x44f)
res += "\\'" + (cp - 0x410 + 0xc0).toString(16)
else
res += chr
}
return res;
}
console.log(handleCyrillic("цй йй!!"));

How to make everyother word in string upper

I don't understand why when I return element.toUpperCase()
it doesn't return it back to the array as uppercase, however if I console.log(element.toUpperCase()) before that return statement it displays as upper case
const sentence = "fur pillows are hard to actually sleep on";
const uppercaseOddWords = (string) => {
string = string.split(" ");
oddWords = string.filter((element, index) => {
if (index % 2 === 0) {
return element;
}
return element.toUpperCase();
});
console.log(oddWords.join(" "));
};
uppercaseOddWords(sentence);
filter callback expected to return truthy or falsy value, in your case all values with be truthy and will not filter anything, what you are looking for is map method.
const sentence = "fur pillows are hard to actually sleep on";
const uppercaseOddWords = (string) => {
string = string.split(" ");
oddWords = string.map((element, index) => {
if (index % 2 === 0) {
return element;
}
return element.toUpperCase();
});
console.log(oddWords.join(" "));
};
uppercaseOddWords(sentence);
.filter() should only be used if you want to pick elements of the array and leave out something based on a boolean logic test. The function expects a logical condition, and can either return true or false. You are using the function wrong.
.map() on the other hand is used to transform the elements in the array from their original state to any other state based on the logic in the function.
const sentence = "fur pillows are hard to actually sleep on";
const uppercaseOddWords = (string) => {
const words = string.split(" ");
const oddWords = words.map((word, index) => {
if (index % 2 === 0) {
return word;
}
return word.toUpperCase();
});
console.log(oddWords.join(" "));
};
uppercaseOddWords(sentence);
Still wondering why returning it doesn't work, but I got the function to work this way;
const sentence = "fur pillows are hard to actually sleep on";
const uppercaseOddWords = (string) => {
let newString = "";
string = string.split(" ");
string.filter((element, index) => {
if (index % 2 === 0) {
return (newString += element + " ");
}
return (newString += element.toUpperCase() + " ");
});
console.log(newString.trim()); //remove trailing spaces
};
uppercaseOddWords(sentence);
You can achieve this by using Array.map() method which will return same number of elements as passed, Based on the condition for odd elements, We can convert odd elements in an upper case.
Live Demo :
const sentence = "fur pillows are hard to actually sleep on";
const uppercaseOddWords = (string) => {
string = string.split(" ");
const res = string.map((element, index) => (index % 2 === 0) ? element.toUpperCase() : element);
console.log(res.join(" "));
};
uppercaseOddWords(sentence);

wrap a function in javascript

let str = "i am writing an algorithm.";
//function to count alphabets
const alphabet_count = (str) => str.length;
//function to count words
const word_count = (str) => str.split(" ").length;
//function to count vowel
const vowel_count = (str) => (str.match(/[aeiou]/gi)).length;
//here i am trying to wrap all three functions in one
const sentence_read() = {alphabet_count(), word_count(), vowel_count()};
I am trying to trying to wrap all three functions in one.
const sentence_read = (str) => [alphabet_count(str), word_count(str), vowel_count(str)]
will return an array with your 3 results.
Usage :
let str = "a word";
console.log(sentence_read(str)) // output : [6, 2, 2]
Using a template string
let str = "i am writing an algorithm.";
// function to count alphabets
const alphabet_count = (str) => str.length;
// function to count words
const word_count = (str) => str.split(" ").length;
//function to count vowel
const vowel_count = (str) => (str.match(/[aeiou]/gi)).length;
const sentence_read = (str) => `a_c : ${alphabet_count(str)}, w_c : ${word_count(str)}, v_c : ${vowel_count(str)}`
console.log(sentence_read(str)) // a_c : 26, w_c : 5, v_c : 8
If you want to group the functions in an object, you can use:
const str = "i am writing an algorithm.";
const counter = {
alphabet: (s) => s.length,
word: (s) => s.split(" ").length,
vowel: (s) => (s.match(/[aeiou]/gi)).length
}
const count = (unit, str) => {
if(!counter[unit]) throw Error('Unit does not exist')
return counter[unit](str)
}
console.log(count('alphabet', str)) // 26
console.log(count('word', str)) // 5
console.log(count('vowel', str)) // 8

How to replace string only if the replacement exists in the target object?

I have the following snippet from this post, this code replaces the token $ENV[accountsDomain] in a string with the key from item object accountsDomain: 'www.yahoo.com' and similarly for accountsUrl
how can I run replace conditionally, replace the string only if item[g1] exists, as these keys are optional
const item = {
accountsUrl: 'www.google.com',
accountsDomain: 'www.yahoo.com'
}
const regex = /\$ENV\[(\w+)]/
let s = "foo$ENV[accountsDomain]bar";
s = s.replace(regex, (m, g1) => item[g1] || m);
console.log(s);
You cannot do that in replace, you have to do it outside.
const item = {
accountsUrl: 'www.google.com',
accountsDomain: 'www.yahoo.com'
}
function replaceItem(item, key) {
const regex = /\$ENV\[(\w+)]/
let s = `foo$ENV[${key}]bar`;
let [, match] = s.match(regex);
return item[match] && s.replace(regex, (m, g1) => item[g1]);
}
console.log('result with existing key:', replaceItem(item, 'accountsDomain'))
console.log('result with un-existing key:', replaceItem(item, 'accDomain'))

Regex from loops

this.personRegex = (input) => {
return new RegExp(persons.map((person) => {
return person.fullname;
}).join("|"), "gi");
}
the above give me :
/lwe|alvin/gi;
How do I get the below:
/\b(?:lwe|alvin)\b/gi;
personRegex = (persons) => {
return new RegExp("\\b(?:"+persons.map((person, i) => {
return person.fullname;
}).join("|") + ")\\b", "gi");
}
console.log(personRegex([{
fullname: "lwe"
}, {
fullname: "alvin"
}]));
You can simply do string concatenation:
var personAlternatives = persons.map((person) => { return person.fullname; }).join("|")
this.personRegex = (input) => {
return new RegExp('\\b(?:' + personAlternatives + ')\\b', "gi");
}
This should give you the least overhead:
function buildRegex(array){
var reg;
var mapResult = array.map((item) => {
return item.attr;
}).join("|");
return new RegExp("\\b(?:"+mapResult+")\\b", "gi");
}
console.log(buildRegex([{attr:"a"},{attr:"b"}]));
You can use template strings, with String.raw to avoid worrying about double backslashes:
personRegex = input =>
new RegExp(
String.raw`\b(?:${input.map(person => person.fullname).join("|")})\b`,
"gi");
console.log(personRegex([{fullname: 'Bob'}, {fullname: 'Sally'}]));
In the real world, you would want to escape any possible special regexp characters in the input. For more information on doing that, see this question.

Categories