Javascript replace utf characters in string

Javascript replace utf characters in string - javascript

I want to after type Title of post automatically take value and create slug. My code works fine with English Latin characters but problem is when I type characters 'čćšđž'. Code replace first type of this characters in string but if character is repeated than is a problem. So, for testing purpose this title 'šžđčćžđš čćšđžčćšđž čćšđžčć ćčšđžšžčćšđ ćčšžčć' is converted to this slug 'szdcc'.
This is my jquery code:
$('input[name=title]').on('blur', function() {
var slugElement = $('input[name=slug]');
if(slugElement.val()) {
return;
}
slugElement.val(this.value.toLowerCase().replace('ž', 'z').replace('č','c').replace('š', 's').replace('ć', 'c').replace('đ', 'd').replace(/[^a-z0-9-]+/g, '-').replace(/^-+|-+$/g, ''));
});
How to solve this problems? Also is it possible to this few characters put in same replace() function?

Try this:
function clearText(inp) {
var wrong = 'čćšđž';
var right = 'ccsdz';
var re = new RegExp('[' + wrong + ']', 'ig');
return inp.replace(re, function (m) { return right.charAt(wrong.indexOf(m)); });
}

replace() only replaces the first occurrence unless regex is used with global modifier. You will need to change them all to regular expression.
replace(/ž/g, "z")
As far as I know, it will not be possible to use a single replace() in your case.
If you are concerned with chaining a bunch of .replace() together, you might be better off writing some custom code to replace these characters.
var newStr = "";
for (var i = 0; i < str.length; i++) {
var c = str.charAt(i);
switch (c) {
case "ž": newStr += "z"; break;
default: newStr += c; break;
}
}

Related

Correctly writing a javascript regex to split a stirng

This is an example of the input:
var input="0-test-different-0_0:11,0-test-different-0_1:54,0-test-different-1_0:19,0-test-different-1_1:8,0-test-same-2_0:20,0-test-same-2_1:20,0-test-different-3_0:19,0-test-different-3_1:16,0-test-different-4_0:18,0-test-different-4_1:17,0-test-different-5_0:20,0-test-different-5_1:11,0-test-different-6_0:20,0-test-different-6_1:12,0-test-same-7_0:20,0-test-same-7_1:16,0-test-different-8_0:17,0-test-different-8_1:11,0-test-same-9_0:19,0-test-same-9_1:17,01-trial-same-10_0:19,01-trial-same-10_1:23,02-trial-different-11_0:22,02-trial-different-11_1:10,03-trial-different-12_0:20,03-trial-different-12_1:12,04-trial-same-13_0:18,04-trial-same-13_1:14,05-trial-same-14_0:17,05-trial-same-14_1:19,06-trial-different-15_0:21,06-trial-different-15_1:10,07-trial-same-16_0:20,07-trial-same-16_1:17,08-trial-different-17_0:20,08-trial-different-17_1:8,09-trial-same-18_0:20,09-trial-same-18_1:10,10-trial-different-19_0:21,10-trial-different-19_1:10,11-trial-same-20_0:19,11-trial-same-20_1:12,12-trial-same-21_0:19,12-trial-same-21_1:16,13-trial-different-22_0:19,13-trial-different-22_1:14,14-trial-different-23_0:22,14-trial-different-23_1:13,15-trial-same-24_0:19,15-trial-same-24_1:12,16-trial-same-25_0:18,16-trial-same-25_1:10,17-trial-different-26_0:20,17-trial-different-26_1:30,18-trial-different-27_0:21,18-trial-different-27_1:20,19-trial-same-28_0:17,19-trial-same-28_1:15,20-trial-different-29_0:17,20-trial-different-29_1:12,21-trial-different-30_0:18,21-trial-different-30_1:11,22-trial-different-31_0:16,22-trial-different-31_1:13,23-trial-same-32_0:19,23-trial-same-32_1:13,24-trial-different-33_0:19,24-trial-different-33_1:11,25-trial-same-34_0:20,25-trial-same-34_1:17,26-trial-same-35_0:22,26-trial-same-35_1:20,27-trial-same-36_0:20,27-trial-same-36_1:14,28-trial-same-37_0:22,28-trial-same-37_1:11,29-trial-different-38_0:19,29-trial-different-38_1:15,30-trial-same-39_0:19,30-trial-same-39_1:14,31-trial-different-40_0:18,31-trial-different-40_1:10,32-trial-different-41_0:18,32-trial-different-41_1:12-0-test-different-0_0:8,0-test-different-0_1:54,0-test-different-1_0:21,0-test-different-1_1:11,0-test-same-2_0:24,0-test-same-2_1:20,0-test-different-3_0:13,0-test-different-3_1:18,0-test-different-4_0:20,0-test-different-4_1:20,0-test-different-5_0:19,0-test-different-5_1:10,0-test-different-6_0:18,0-test-different-6_1:13,0-test-same-7_0:16,0-test-same-7_1:16,0-test-different-8_0:15,0-test-different-8_1:10,0-test-same-9_0:22,0-test-same-9_1:15,01-trial-same-10_0:20,01-trial-same-10_1:23,02-trial-different-11_0:24,02-trial-different-11_1:14,03-trial-different-12_0:18,03-trial-different-12_1:14,04-trial-same-13_0:18,04-trial-same-13_1:12,05-trial-same-14_0:23,05-trial-same-14_1:21,06-trial-different-15_0:21,06-trial-different-15_1:12,07-trial-same-16_0:19,07-trial-same-16_1:16,08-trial-different-17_0:22,08-trial-different-17_1:8,09-trial-same-18_0:21,09-trial-same-18_1:7,10-trial-different-19_0:16,10-trial-different-19_1:10,11-trial-same-20_0:21,11-trial-same-20_1:10,12-trial-same-21_0:18,12-trial-same-21_1:20,13-trial-different-22_0:19,13-trial-different-22_1:13,14-trial-different-23_0:17,14-trial-different-23_1:9,15-trial-same-24_0:19,15-trial-same-24_1:11,16-trial-same-25_0:23,16-trial-same-25_1:11,17-trial-different-26_0:21,17-trial-different-26_1:27,18-trial-different-27_0:20,18-trial-different-27_1:22,19-trial-same-28_0:19,19-trial-same-28_1:19,20-trial-different-29_0:21,20-trial-different-29_1:10,21-trial-different-30_0:19,21-trial-different-30_1:11,22-trial-different-31_0:20,22-trial-different-31_1:10,23-trial-same-32_0:21,23-trial-same-32_1:10,24-trial-different-33_0:19,24-trial-different-33_1:14,25-trial-same-34_0:16,25-trial-same-34_1:16,26-trial-same-35_0:22,26-trial-same-35_1:22,27-trial-same-36_0:23,27-trial-same-36_1:18,28-trial-same-37_0:19,28-trial-same-37_1:12,29-trial-different-38_0:21,29-trial-different-38_1:13,30-trial-same-39_0:22,30-trial-same-39_1:13,31-trial-different-40_0:21,31-trial-different-40_1:11,32-trial-different-41_0:19,32-trial-different-41_1:11";
At string index 2015 there is a - which actually separates this string in two.
It is different from all the other dashes in that it is the ONLY one preceeded by a colon (:) and then 1 to N numbers before there is a dash.
The way I've been finding it is by using this procedure:
var searchingColon = true;
for (var i = 0; i < input.length; i++){
if ((searchingColon) && (input.charAt(i) == ':')){
searchingColon = false;
}
else if (!searchingColon){
if (input.charAt(i) == "-"){
console.log("Found at " + i);
return;
}
if (isNaN(parseInt(input.charAt(i), 10))) {
searchingColon = true;
}
}
}
However it takes a while and I thought a regular expression would be better.
So I've tried this:
regex = "/:[0-9]+-/"
var res = input.search(regex)
console.log(res)
But res is -1, which mean it hasn't found anything. What am I doing wrong?

You should remove the quotes. Regular expressions in JavaScript are enclosed by forward slashes, not quotes.
regex = /:[0-9]+-/;
var res = input.search(regex)
console.log(res)
However, you can create a regex surround by quotes if you use the Regex constructor.
var regex2 = new RegExp(':[0-9]+-');
I usually prefer the first method, because you have to escape the \ and use \\ if you use the string variation.

How to account for punctuation and uppercase while counting the number of occurrences of a string in a sentence

I'm writing a function but cannot figure out how to account for upper case letter and punctuation.
My function is :
function countWords(word, string) {
var subStr = string.split(word);
return subStr.length - 1;
}
And it works when I try to test is with wordCount("hey","this is code hey"), but not if I try ("HEY", "this is code hey")
I tried
var subStr= string.toUpperCase().split(word)
but it will not work with lower case letters anymore.

Why can't you try like this.
function countWords(word, string) {
word= word.toLowerCase();
string=string.toLowerCase();
var subStr = string.split(word);
return subStr.length - 1;
}
So that whatever values we sent it will be converted into lower case then it will split.
Does it makes sense right?

Try this :
function countWords(word, string) {
var subStr = string.toLowerCase().split(word.toLowerCase());
return subStr.length - 1;
}
$(document).ready(function(){
$('#result').html(countWords("HEY","this is code hey"));
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
<div id="result"></div>

Try sending the parameter to either all upper or lower cases first so that it matches the case of the string you are comparing it to. For example,
function countWords(word.toLowerCase, string.toLowerCase)
That way the search is evaluated regardless of case.

You could use a regex with the i and g modifier (case insensitive/match all) and match, then return the length:
function wordCount(search, txt) {
var regex = new RegExp("\\W" + search + "\\W|\\W" + search, "ig");
var match = txt.match(regex);
return match ? match.length : 0;
}
console.log(wordCount("hey","this is code heyHey HEY hey")); // 2
If you want to have heyHey as 2 matches, simply remove |\\W" + search from the regex

Remove all special characters except space from a string using JavaScript

I want to remove all special characters except space from a string using JavaScript.
For example,
abc's test#s
should output as
abcs tests.

You should use the string replace function, with a single regex.
Assuming by special characters, you mean anything that's not letter, here is a solution:
const str = "abc's test#s";
console.log(str.replace(/[^a-zA-Z ]/g, ""));

You can do it specifying the characters you want to remove:
string = string.replace(/[&\/\\#,+()$~%.'":*?<>{}]/g, '');
Alternatively, to change all characters except numbers and letters, try:
string = string.replace(/[^a-zA-Z0-9]/g, '');

The first solution does not work for any UTF-8 alphabet. (It will cut text such as Привіт). I have managed to create a function which does not use RegExp and use good UTF-8 support in the JavaScript engine. The idea is simple if a symbol is equal in uppercase and lowercase it is a special character. The only exception is made for whitespace.
function removeSpecials(str) {
var lower = str.toLowerCase();
var upper = str.toUpperCase();
var res = "";
for(var i=0; i<lower.length; ++i) {
if(lower[i] != upper[i] || lower[i].trim() === '')
res += str[i];
}
return res;
}
Update: Please note, that this solution works only for languages where there are small and capital letters. In languages like Chinese, this won't work.
Update 2: I came to the original solution when I was working on a fuzzy search. If you also trying to remove special characters to implement search functionality, there is a better approach. Use any transliteration library which will produce you string only from Latin characters and then the simple Regexp will do all magic of removing special characters. (This will work for Chinese also and you also will receive side benefits by making Tromsø == Tromso).

search all not (word characters || space):
str.replace(/[^\w ]/, '')

I don't know JavaScript, but isn't it possible using regex?
Something like [^\w\d\s] will match anything but digits, characters and whitespaces. It would be just a question to find the syntax in JavaScript.

I tried Seagul's very creative solution, but found it treated numbers also as special characters, which did not suit my needs. So here is my (failsafe) tweak of Seagul's solution...
//return true if char is a number
function isNumber (text) {
if(text) {
var reg = new RegExp('[0-9]+$');
return reg.test(text);
}
return false;
}
function removeSpecial (text) {
if(text) {
var lower = text.toLowerCase();
var upper = text.toUpperCase();
var result = "";
for(var i=0; i<lower.length; ++i) {
if(isNumber(text[i]) || (lower[i] != upper[i]) || (lower[i].trim() === '')) {
result += text[i];
}
}
return result;
}
return '';
}

const str = "abc's#thy#^g&test#s";
console.log(str.replace(/[^a-zA-Z ]/g, ""));

Try to use this one
var result= stringToReplace.replace(/[^\w\s]/g, '')
[^] is for negation, \w for [a-zA-Z0-9_] word characters and \s for space,
/[]/g for global

With regular expression
let string = "!#This tool removes $special *characters* /other/ than! digits, characters and spaces!!!$";
var NewString= string.replace(/[^\w\s]/gi, '');
console.log(NewString);
Result //This tool removes special characters other than digits characters and spaces
Live Example : https://helpseotools.com/text-tools/remove-special-characters

dot (.) may not be considered special. I have added an OR condition to Mozfet's & Seagull's answer:
function isNumber (text) {
reg = new RegExp('[0-9]+$');
if(text) {
return reg.test(text);
}
return false;
}
function removeSpecial (text) {
if(text) {
var lower = text.toLowerCase();
var upper = text.toUpperCase();
var result = "";
for(var i=0; i<lower.length; ++i) {
if(isNumber(text[i]) || (lower[i] != upper[i]) || (lower[i].trim() === '') || (lower[i].trim() === '.')) {
result += text[i];
}
}
return result;
}
return '';
}

Try this:
const strippedString = htmlString.replace(/(<([^>]+)>)/gi, "");
console.log(strippedString);

const input = `#if_1 $(PR_CONTRACT_END_DATE) == '23-09-2019' #
Test27919<alerts#imimobile.com> #elseif_1 $(PR_CONTRACT_START_DATE) == '20-09-2019' #
Sender539<rama.sns#gmail.com> #elseif_1 $(PR_ACCOUNT_ID) == '1234' #
AdestraSID<hello#imimobile.co> #else_1#Test27919<alerts#imimobile.com>#endif_1#`;
const replaceString = input.split('$(').join('->').split(')').join('<-');
console.log(replaceString.match(/(?<=->).*?(?=<-)/g));

Whose special characters you want to remove from a string, prepare a list of them and then user javascript replace function to remove all special characters.
var str = 'abc'de#;:sfjkewr47239847duifyh';
alert(str.replace("'","").replace("#","").replace(";","").replace(":",""));
or you can run loop for a whole string and compare single single character with the ASCII code and regenerate a new string.

Regex, grab only one instance of each letter

I have a paragraph that's broken up into an array, split at the periods. I'd like to perform a regex on index[i], replacing it's contents with one instance of each letter that index[i]'s string value has.
So; index[i]:"This is a sentence" would return --> index[i]:"thisaenc"
I read this thread. But i'm not sure if that's what i'm looking for.

Not sure how to do this in regex, but here's a very simple function to do it without using regex:
function charsInString(input) {
var output='';
for(var pos=0; pos<input.length; pos++) {
char=input.charAt(pos).toLowerCase();
if(output.indexOf(char) == -1 && char != ' ') {output+=char;}
}
return output;
}
alert(charsInString('This is a sentence'));

As I'm pretty sure what you need cannot be achieved using a single regular expression, I offer a more general solution:
// collapseSentences(ary) will collapse each sentence in ary
// into a string containing its constituent chars
// #param {Array} the array of strings to collapse
// #return {Array} the collapsed sentences
function collapseSentences(ary){
var result=[];
ary.forEach(function(line){
var tmp={};
line.toLowerCase().split('').forEach(function(c){
if(c >= 'a' && c <= 'z') {
tmp[c]++;
}
});
result.push(Object.keys(tmp).join(''));
});
return result;
}
which should do what you want except that the order of characters in each sentence cannot be guaranteed to be preserved, though in most cases it is.
Given:
var index=['This is a sentence','This is a test','this is another test'],
result=collapseSentences(index);
result contains:
["thisaenc","thisae", "thisanoer"]

(\w)(?<!.*?\1)
This yields a match for each of the right characters, but as if you were reading right-to-left instead.
This finds a word character, then looks ahead for the character just matched.

Nevermind, i managed:
justC = "";
if (color[i+1].match(/A/g)) {justC += " L_A";}
if (color[i+1].match(/B/g)) {justC += " L_B";}
if (color[i+1].match(/C/g)) {justC += " L_C";}
if (color[i+1].match(/D/g)) {justC += " L_D";}
if (color[i+1].match(/E/g)) {justC += " L_E";}
else {color[i+1] = "L_F";}
It's not exactly what my question may have lead to belive is what i wanted, but the printout for this is what i was after, for use in a class: <span class="L_A L_C L_E"></span>

How about:
var re = /(.)((.*?)\1)/g;
var str = 'This is a sentence';
x = str.toLowerCase();
x = x.replace(/ /g, '');
while(x.match(re)) {
x=x.replace(re, '$1$3');
}

I don't think this can be done in one fell regex swoop. You are going to need to use a loop.
While my example was not written in your language of choice, it doesn't seem to use any regex features not present in javascript.
perl -e '$foo="This is a sentence"; while ($foo =~ s/((.).*?)\2/$1/ig) { print "<$1><$2><$foo>\n"; } print "$foo\n";'
Producing:
This aenc

Javascript regex - split string

Struggling with a regex requirement. I need to split a string into an array wherever it finds a forward slash. But not if the forward slash is preceded by an escape.
Eg, if I have this string:
hello/world
I would like it to be split into an array like so:
arrayName[0] = hello
arrayName[1] = world
And if I have this string:
hello/wo\/rld
I would like it to be split into an array like so:
arrayName[0] = hello
arrayName[1] = wo/rld
Any ideas?

I wouldn't use split() for this job. It's much easier to match the path components themselves, rather than the delimiters. For example:
var subject = 'hello/wo\\/rld';
var regex = /(?:[^\/\\]+|\\.)+/g;
var matched = null;
while (matched = regex.exec(subject)) {
print(matched[0]);
}
output:
hello
wo\/rld
test it at ideone.com

The following is a little long-winded but will work, and avoids the problem with IE's broken split implementation by not using a regular expression.
function splitPath(str) {
var rawParts = str.split("/"), parts = [];
for (var i = 0, len = rawParts.length, part; i < len; ++i) {
part = "";
while (rawParts[i].slice(-1) == "\\") {
part += rawParts[i++].slice(0, -1) + "/";
}
parts.push(part + rawParts[i]);
}
return parts;
}
var str = "hello/world\\/foo/bar";
alert( splitPath(str).join(",") );

Here's a way adapted from the techniques in this blog post:
var str = "Testing/one\\/two\\/three";
var result = str.replace(/(\\)?\//g, function($0, $1){
return $1 ? '/' : '[****]';
}).split('[****]');
Live example
Given:
Testing/one\/two\/three
The result is:
[0]: Testing
[1]: one/two/three
That first uses the simple "fake" lookbehind to replace / with [****] and to replace \/ with /, then splits on the [****] value. (Obviously, replace [****] with anything that won't be in the string.)

/*
If you are getting your string from an ajax response or a data base query,
that is, the string has not been interpreted by javascript,
you can match character sequences that either have no slash or have escaped slashes.
If you are defining the string in a script, escape the escapes and strip them after the match.
*/
var s='hello/wor\\/ld';
s=s.match(/(([^\/]*(\\\/)+)([^\/]*)+|([^\/]+))/g) || [s];
alert(s.join('\n'))
s.join('\n').replace(/\\/g,'')
/* returned value: (String)
hello
wor/ld
*/

Here's an example at rubular.com

For short code, you can use reverse to simulate negative lookbehind
function reverse(s){
return s.split('').reverse().join('');
}
var parts = reverse(myString).split(/[/](?!\\(?:\\\\)*(?:[^\\]|$))/g).reverse();
for (var i = parts.length; --i >= 0;) { parts[i] = reverse(parts[i]); }
but to be efficient, it's probably better to split on /[/]/ and then walk the array and rejoin elements that have an escape at the end.

Something like this may take care of it for you.
var str = "/hello/wo\\/rld/";
var split = str.replace(/^\/|\\?\/|\/$/g, function(match) {
if (match.indexOf('\\') == -1) {
return '\x00';
}
return match;
}).split('\x00');
alert(split);

We Keep Coding

JavaScript is the programming language of the Web.

Javascript replace utf characters in string - javascript

Try this: function clearText(inp) { var wrong = 'čćšđž'; var right = 'ccsdz'; var re = new RegExp('[' + wrong + ']', 'ig'); return inp.replace(re, function (m) { return right.charAt(wrong.indexOf(m)); }); }

Related

Correctly writing a javascript regex to split a stirng

How to account for punctuation and uppercase while counting the number of occurrences of a string in a sentence

Remove all special characters except space from a string using JavaScript

Regex, grab only one instance of each letter

Javascript regex - split string

Categories

Resources