Intl.NumberFormat() constructor with phone numbers [duplicate] - javascript

I'm looking to reformat (replace, not validate - there are many references for validating) a phone number for display in Javascript. Here's an example of some of the data:
123 4567890
(123) 456-7890
(123)456-7890
123 456 7890
123.456.7890
(blank/null)
1234567890
Is there an easy way to use a regular expression to do this? I'm looking for the best way to do this. Is there a better way?
I want to reformat the number to the following: (123) 456-7890

Assuming you want the format "(123) 456-7890":
function formatPhoneNumber(phoneNumberString) {
var cleaned = ('' + phoneNumberString).replace(/\D/g, '');
var match = cleaned.match(/^(\d{3})(\d{3})(\d{4})$/);
if (match) {
return '(' + match[1] + ') ' + match[2] + '-' + match[3];
}
return null;
}
Here's a version that allows the optional +1 international code:
function formatPhoneNumber(phoneNumberString) {
var cleaned = ('' + phoneNumberString).replace(/\D/g, '');
var match = cleaned.match(/^(1|)?(\d{3})(\d{3})(\d{4})$/);
if (match) {
var intlCode = (match[1] ? '+1 ' : '');
return [intlCode, '(', match[2], ') ', match[3], '-', match[4]].join('');
}
return null;
}
formatPhoneNumber('+12345678900') // => "+1 (234) 567-8900"
formatPhoneNumber('2345678900') // => "(234) 567-8900"

Possible solution:
function normalize(phone) {
//normalize string and remove all unnecessary characters
phone = phone.replace(/[^\d]/g, "");
//check if number length equals to 10
if (phone.length == 10) {
//reformat and return phone number
return phone.replace(/(\d{3})(\d{3})(\d{4})/, "($1) $2-$3");
}
return null;
}
var phone = '(123)4567890';
phone = normalize(phone); //(123) 456-7890

var x = '301.474.4062';
x = x.replace(/\D+/g, '')
.replace(/(\d{3})(\d{3})(\d{4})/, '($1) $2-$3');
alert(x);

This answer borrows from maerics' answer. It differs primarily in that it accepts partially entered phone numbers and formats the parts that have been entered.
phone = value.replace(/\D/g, '');
const match = phone.match(/^(\d{1,3})(\d{0,3})(\d{0,4})$/);
if (match) {
phone = `${match[1]}${match[2] ? ' ' : ''}${match[2]}${match[3] ? '-' : ''}${match[3]}`;
}
return phone

I'm using this function to format US numbers.
function formatUsPhone(phone) {
var phoneTest = new RegExp(/^((\+1)|1)? ?\(?(\d{3})\)?[ .-]?(\d{3})[ .-]?(\d{4})( ?(ext\.? ?|x)(\d*))?$/);
phone = phone.trim();
var results = phoneTest.exec(phone);
if (results !== null && results.length > 8) {
return "(" + results[3] + ") " + results[4] + "-" + results[5] + (typeof results[8] !== "undefined" ? " x" + results[8] : "");
}
else {
return phone;
}
}
It accepts almost all imaginable ways of writing a US phone number. The result is formatted to a standard form of (987) 654-3210 x123

thinking backwards
Take the last digits only (up to 10) ignoring first "1".
function formatUSNumber(entry = '') {
const match = entry
.replace(/\D+/g, '').replace(/^1/, '')
.match(/([^\d]*\d[^\d]*){1,10}$/)[0]
const part1 = match.length > 2 ? `(${match.substring(0,3)})` : match
const part2 = match.length > 3 ? ` ${match.substring(3, 6)}` : ''
const part3 = match.length > 6 ? `-${match.substring(6, 10)}` : ''
return `${part1}${part2}${part3}`
}
example input / output as you type
formatUSNumber('+1333')
// (333)
formatUSNumber('333')
// (333)
formatUSNumber('333444')
// (333) 444
formatUSNumber('3334445555')
// (333) 444-5555

2021
libphonenumber-js
Example
import parsePhoneNumber from 'libphonenumber-js'
const phoneNumber = parsePhoneNumber('+12133734253')
phoneNumber.formatInternational() === '+1 213 373 4253'
phoneNumber.formatNational() === '(213) 373-4253'
phoneNumber.getURI() === 'tel:+12133734253'

Based on David Baucum's answer - here is a version that trys to improve auto-replacement "as you type" for example in a React onChange event handler:
function formatPhoneNumber(phoneNumber) {
const cleanNum = phoneNumber.toString().replace(/\D/g, '');
const match = cleanNum.match(/^(\d{3})(\d{0,3})(\d{0,4})$/);
if (match) {
return '(' + match[1] + ') ' + (match[2] ? match[2] + "-" : "") + match[3];
}
return cleanNum;
}
//...
onChange={e => setPhoneNum(formatPhoneNumber(e.target.value))}
It will insert (###) as soon as there are 3 numbers and then it will keep following the RegEx until it looks like this (###) ###-####

I've extended David Baucum's answer to include support for extensions up to 4 digits in length. It also includes the parentheses requested in the original question. This formatting will work as you type in the field.
phone = phone.replace(/\D/g, '');
const match = phone.match(/^(\d{1,3})(\d{0,3})(\d{0,4})(\d{0,4})$/);
if (match) {
phone = `(${match[1]}${match[2] ? ') ' : ''}${match[2]}${match[3] ? '-' : ''}${match[3]}${match[4] ? ' x' : ''}${match[4]}`;
}
return phone;

Almost all of these have issues when the user tries to backspace over the delimiters, particularly from the middle of the string.
Here's a jquery solution that handles that, and also makes sure the cursor stays in the right place as you edit:
//format text input as phone number (nnn) nnn-nnnn
$('.myPhoneField').on('input', function (e){
var $phoneField = e.target;
var cursorPosition = $phoneField.selectionStart;
var numericString = $phoneField.value.replace(/\D/g, '').substring(0, 10);
// let user backspace over the '-'
if (cursorPosition === 9 && numericString.length > 6) return;
// let user backspace over the ') '
if (cursorPosition === 5 && numericString.length > 3) return;
if (cursorPosition === 4 && numericString.length > 3) return;
var match = numericString.match(/^(\d{1,3})(\d{0,3})(\d{0,4})$/);
if (match) {
var newVal = '(' + match[1];
newVal += match[2] ? ') ' + match[2] : '';
newVal += match[3] ? '-' + match[3] : '';
// to help us put the cursor back in the right place
var delta = newVal.length - Math.min($phoneField.value.length, 14);
$phoneField.value = newVal;
$phoneField.selectionEnd = cursorPosition + delta;
} else {
$phoneField.value = '';
}
})

var numbers = "(123) 456-7890".replace(/[^\d]/g, ""); //This strips all characters that aren't digits
if (numbers.length != 10) //wrong format
//handle error
var phone = "(" + numbers.substr(0, 3) + ") " + numbers.substr(3, 3) + "-" + numbers.substr(6); //Create format with substrings

Here is one that will accept both phone numbers and phone numbers with extensions.
function phoneNumber(tel) {
var toString = String(tel),
phoneNumber = toString.replace(/[^0-9]/g, ""),
countArrayStr = phoneNumber.split(""),
numberVar = countArrayStr.length,
closeStr = countArrayStr.join("");
if (numberVar == 10) {
var phone = closeStr.replace(/(\d{3})(\d{3})(\d{4})/, "$1.$2.$3"); // Change number symbols here for numbers 10 digits in length. Just change the periods to what ever is needed.
} else if (numberVar > 10) {
var howMany = closeStr.length,
subtract = (10 - howMany),
phoneBeginning = closeStr.slice(0, subtract),
phoneExtention = closeStr.slice(subtract),
disX = "x", // Change the extension symbol here
phoneBeginningReplace = phoneBeginning.replace(/(\d{3})(\d{3})(\d{4})/, "$1.$2.$3"), // Change number symbols here for numbers greater than 10 digits in length. Just change the periods and to what ever is needed.
array = [phoneBeginningReplace, disX, phoneExtention],
afterarray = array.splice(1, 0, " "),
phone = array.join("");
} else {
var phone = "invalid number US number";
}
return phone;
}
phoneNumber("1234567891"); // Your phone number here

For all international Phone numbers with country code upto 3 digits, we can change the original answer a little bit as below.
For first match instead of looking for '1' we should look for 1-3 digits.
export const formatPhoneNumber = (phoneNumberString) => {
var cleaned = ('' + phoneNumberString).replace(/\D/g, '');
var match = cleaned.match(/^(\d{1,3}|)?(\d{3})(\d{3})(\d{4})$/);
if (match) {
var intlCode = (match[1] ? `+${match[1]} ` : '');
return [intlCode, '(', match[2], ') ', match[3], '-', match[4]].join('');
}
return null;
}
console.log( formatPhoneNumber('16464765278') )//+1 (646) 476-5278
console.log( formatPhoneNumber('+2549114765278')) //+254 (911) 476-5278
console.log( formatPhoneNumber('929876543210') )//+92 (987) 654-3210
Fulfils my requirement.

For US Phone Numbers
/^\(?(\d{3})\)?[- ]?(\d{3})[- ]?(\d{4})$/
Let’s divide this regular expression in smaller fragments to make is easy to understand.
/^\(?: Means that the phone number may begin with an optional (.
(\d{3}): After the optional ( there must be 3 numeric digits. If the phone number does not have a (, it must start with 3 digits. E.g. (308 or 308.
\)?: Means that the phone number can have an optional ) after first 3 digits.
[- ]?: Next the phone number can have an optional hyphen (-) after ) if present or after first 3 digits.
(\d{3}): Then there must be 3 more numeric digits. E.g (308)-135 or 308-135 or 308135
[- ]?: After the second set of 3 digits the phone number can have another optional hyphen (-). E.g (308)-135- or 308-135- or 308135-
(\d{4})$/: Finally, the phone number must end with four digits. E.g (308)-135-7895 or 308-135-7895 or 308135-7895 or 3081357895.
Reference :
http://www.zparacha.com/phone_number_regex/

You can use this functions to check valid phone numbers and normalize them:
let formatPhone = (dirtyNumber) => {
return dirtyNumber.replace(/\D+/g, '').replace(/(\d{3})(\d{3})(\d{4})/, '($1) $2-$3');
}
let isPhone = (phone) => {
//normalize string and remove all unnecessary characters
phone = phone.replace(/\D+/g, '');
return phone.length == 10? true : false;
}

The solutions above are superior, especially if using Java, and encountering more numbers with more than 10 digits such as the international code prefix or additional extension numbers. This solution is basic (I'm a beginner in the regex world) and designed with US Phone numbers in mind and is only useful for strings with just 10 numbers with perhaps some formatting characters, or perhaps no formatting characters at all (just 10 numbers). As such I would recomend this solution only for semi-automatic applications. I Personally prefer to store numbers as just 10 numbers without formatting characters, but also want to be able to convert or clean phone numbers to the standard format normal people and apps/phones will recognize instantly at will.
I came across this post looking for something I could use with a text cleaner app that has PCRE Regex capabilities (but no java functions). I will post this here for people who could use a simple pure Regex solution that could work in a variety of text editors, cleaners, expanders, or even some clipboard managers. I personally use Sublime and TextSoap. This solution was made for Text Soap as it lives in the menu bar and provides a drop-down menu where you can trigger text manipulation actions on what is selected by the cursor or what's in the clipboard.
My approach is essentially two substitution/search and replace regexes. Each substitution search and replace involves two regexes, one for search and one for replace.
Substitution/ Search & Replace #1
The first substitution/ search & replace strips non-numeric numbers from an otherwise 10-digit number to a 10-digit string.
First Substitution/ Search Regex: \D
This search string matches all characters that is not a digit.
First Substitution/ Replace Regex: "" (nothing, not even a space)
Leave the substitute field completely blank, no white space should exist including spaces. This will result in all matched non-digit characters being deleted. You should have gone in with 10 digits + formatting characters prior this operation and come out with 10 digits sans formatting characters.
Substitution/ Search & Replace #2
The second substitution/search and replace search part of the operation captures groups for area code $1, a capture group for the second set of three numbers $2, and the last capture group for the last set of four numbers $3. The regex for the substitute portion of the operation inserts US phone number formatting in between the captured group of digits.
Second Substitution/ Search Regex: (\d{3})(\d{3})(\d{4})
Second Substitution/ Replace Regex: \($1\) $2\-$3
The backslash \ escapes the special characters (, ) , (<-whitespace), and - since we are inserting them between our captured numbers in capture groups $1, $2, & $3 for US phone number formatting purposes.
In TextSoap I created a custom cleaner that includes the two substitution operation actions, so in practice it feels identical to executing a script. I'm sure this solution could be improved but I expect complexity to go up quite a bit. An improved version of this solution is welcomed as a learning experience if anyone wants to add to this.

Related

Dynamically split string using regex

I am receiving an Australian phone number from the user as a text input. The string will be 10 characters long and begin with 04. I want to split the string as the user is entering it so it turns out like 0411 111 111.
My current solution is value.toString().replace(/^(04\d{2})(\d{3})(\d{3})$/, $1 $2 $3)
This solution splits the string correctly, but only when the user has entered the entire 10 characters. I want it to start splitting after the first 4 characters have been entered ie 0411 1 etc.
Here is a one liner which will work for your exact use case:
var results = "0411111111".split(/(?=\d{6}$)|(?=\d{3}$)/);
console.log(results);
We may split your string on a regex which targets the point after 4 digits and the point after 7 digits.
Consider something like below that checks the length of the currently input mobile number and then applies a different regex depending on the length:
var mobileInput = document.getElementById('mobile');
mobileInput.addEventListener('keyup', foo);
function foo() {
var unformatted = mobileInput.value;
var pattern, replacer;
if (unformatted.length < 5) {
pattern = /(04\d{2})/;
replacer = '$1 ';
} else if (unformatted.length < 9) {
pattern = /(04\d{2})\s{1}(\d{3})/;
replacer = '$1 $2 ';
} else {
pattern = /^(04\d{2})(\d{3})(\d{3})$/;
replacer = '$1 $2 $3';
}
var formatted = unformatted.replace(pattern, replacer);
mobileInput.value = formatted;
}
<input type="text" id="mobile" />
I have managed to come up with a bit of a solution. It is not exactly what I was aiming for, but it does the job.
value.toString()
.replace(/^(04\d{2})(\d{3})(\d{3})$/, $1 $2 $3)
.replace(/[\s-]+$/, "")
This strips out the white space on each keypress (each time the regex is called) and reformats it.
here is my solution:
remove spaces added by before (to recover phone numbers from being splitted)
try to match input using a regex
combine matches and handle some other situations
code:
document.getElementById("phone").addEventListener("input", function() {
var matches = this.value.replace(/ /g, "").match(/^(04\d{2})(\d{3})?(\d{3})?(\d*?)$/);
this.value = matches && matches.length > 2 ?
matches.slice(1, matches.length - 1).join(" ")
+ (matches[matches.length - 1] || "")
: this.value;
});
<input id="phone" maxlength="12">
I'd probably do something like this:
let phone = document.getElementById('phone');
phone.addEventListener('keyup', evt => {
// get value, removing anything that isn't a number
let text = phone.value.replace(/\D/g, '');
// turn it into an array
text = text.split('');
// create a new array containing each group of digits, separated by spaces
let out = [...text.slice(0, 4), ' ', ...text.slice(4, 7), ' ', ...text.slice(7, 10)];
// turn it back into a string, remove any trailing spaces
phone.value = out.join('').trim();
}, false);
<input id="phone">

How to compress IPV6 address using javascript?

I have seen the code to compress IPV6 in java.
The link specifies the same.
Below is the code in Java . String resultString = subjectString.replaceAll("((?::0\\b){2,}):?(?!\\S*\\b\\1:0\\b)(\\S*)", "::$2");
But in Javascript I am confused as how can I get the regex expression to match the same . Can you share some pointers here?
Example : fe80:00:00:00:8e3:a11a:2a49:1148
Result : fe80::8e3:a11a:2a49:1148
There's a couple problems with the other answer by #ClasG:
If the repeating zeroes are at the beginning of the IPv6 address or it's all zeroes, only 1 colon is replaced.
If the repeating zeroes are at the end, they're not replaced.
I suggest using the regex \b:?(?:0+:?){2,} and have it replaced with :: (two colons)
Regex101 tests
JavaScript example:
var ips = [
'2001:0db8:ac10:0000:0000:0000:0000:ffff',
'2001:0db8:ac10:0000:0000:0000:0000:0000',
'0:0:0:0:0:2001:0db8:ac10',
'2001:0db8:ac10:aaaa:0000:bbbb:cccc:ffff',
'2001:0db8:ac10:0000:0000:bbbb:00:00'
];
for (var i = 0; i < ips.length; i++) {
document.write(ips[i].replace(/\b:?(?:0+:?){2,}/, '::') + "<br>");
}
Note: The Regex101 tests replace multiple repeating groups of zeroes. In XYZ programming language, you'll have to limit the number of replacements to 1. In JavaScript, you omit the global flag. In PHP, you set the $limit for preg_replace to 1.
You can do it by replacing
\b(?:0+:){2,}
with
:
function compIPV6(input) {
return input.replace(/\b(?:0+:){2,}/, ':');
}
document.write(compIPV6('2001:db8:0:0:0:0:2:1') + '<br/>');
document.write(compIPV6('fe80:00:00:00:8e3:a11a:2a49:1148' + '<br/>'));
Check it out at regex101.
You can use this method in order to compress IPv6 AND remove leading 0s:
function compressIPV6(input) {
var formatted = input.replace(/\b(?:0+:){2,}/, ':');
var finalAddress = formatted.split(':')
.map(function(octet) {
return octet.replace(/\b0+/g, '');
}).join(':');
return finalAddress;
}
document.write(compressIPV6('2001:0db8:0000:0000:0000:0000:1428:57ab') );
You can use a function that considers all of the needed cases:
const compressIPV6 = (ip) => {
//First remove the leading 0s of the octets. If it's '0000', replace with '0'
let output = ip.split(':').map(terms => terms.replace(/\b0+/g, '') || '0').join(":");
//Then search for all occurrences of continuous '0' octets
let zeros = [...output.matchAll(/\b:?(?:0+:?){2,}/g)];
//If there are occurences, see which is the longest one and replace it with '::'
if (zeros.length > 0) {
let max = '';
zeros.forEach(item => {
if (item[0].replaceAll(':', '').length > max.replaceAll(':', '').length) {
max = item[0];
}
})
output = output.replace(max, '::');
}
return output;
}
document.write(compressIPV6('38c1:3db8:0000:0000:0000:0000:0043:000a') + '<br/>');
document.write(compressIPV6('0000:0000:0000:0000:38c1:3db8:0043:000a') + '<br/>');
document.write(compressIPV6('38c1:3db8:0000:0043:000a:0000:0000:0000') + '<br/>');
document.write(compressIPV6('38c1:0000:0000:3db8:0000:0000:0000:12ab') + '<br/>');
If there's more than one occurrence of consecutive '0' octets of the same length, it will only replace the first one. This will work regardless if the repeating zeroes are at the beginning, at the middle or at the end.

REGEX - Input must have x unique digits

I was wondering if anyone knows a way for regex to detect that there has been a minimum of x digits used.
For example if I put in a number 6 digit number of 111222 but my regex says there must be at least 3 unique numbers this would cause a valid fail.
but if I had 123456 that would pass because there is more than three unique digits used.
Something like (obviously it wont be like bellow.)
/^[0-9]{6}*3$/
non regex way
var telcstart = $('#num').val(),
telc1 = (telcstart.match(/1/g) || []).length,
telc2 = (telcstart.match(/2/g) || []).length,
telc3 = (telcstart.match(/3/g) || []).length,
telc4 = (telcstart.match(/4/g) || []).length,
telc5 = (telcstart.match(/5/g) || []).length,
telc6 = (telcstart.match(/6/g) || []).length,
telc7 = (telcstart.match(/7/g) || []).length,
telc8 = (telcstart.match(/8/g) || []).length,
telc9 = (telcstart.match(/9/g) || []).length,
telc0 = (telcstart.match(/0/g) || []).length,
totaltelc = "a:" + telc1 + " b:" + telc2 + " c:" + telc3 + " d:" + telc4 + "e:" + telc5 + " f:" + telc6 + " g:" + telc7 + " h:" + telc8 + " i:" + telc9 + " j:" + telc0,
finaltelc = (totaltelc.match(/0/g) || []).length;
if (finaltelc <= 8) {
alert('passed');
} else {
alert('failed');
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<input type="number" id="num" value="123451212111">
^(?=.*(.)(?!$|.*\1))(?=.*(?!\1)(.)(?!$|.*\2))[0-9]+$
function check(value) {
var res = document.getElementById('result');
if (/^(?=.*(.)(?!$|.*\1))(?=.*(?!\1)(.)(?!$|.*\2))[0-9]+$/.test(value)) {
res.innerText = '✓ OK';
} else {
res.innerText = '✗ No match';
}
}
<p>Enter a number with ≥3 different digits</p>
<p><input type=text onkeyup='check(this.value)'> <span id=result></span></p>
Let's split it up:
^
(?=
.*(.) # Pick any character in the string as group 1
(?!$|.*\1) # Ensure this is not the last character, but is the last unique character
)
(?=
.*(?!\1)(.) # Pick another character in the string as group 2
(?!$|.*\2) # Same condition as above
)
[0-9]+
$
The first condition ensures there is a string like ??1??a. The second condition ensures a string like ???2?b, where 1 ≠ a and 2 ≠ b and 1 ≠ 2. From this we can conclude there are at least 3 different characters.
This can be easily generalized to e.g. at least 8 different characters needed:
^
(?=.*(.)(?!$|.*\1))
(?=.*(?!\1)(.)(?!$|.*\2))
(?=.*(?!\1|\2)(.)(?!$|.*\3))
(?=.*(?!\1|\2|\3)(.)(?!$|.*\4))
(?=.*(?!\1|\2|\3|\4)(.)(?!$|.*\5))
(?=.*(?!\1|\2|\3|\4|\5)(.)(?!$|.*\6))
(?=.*(?!\1|\2|\3|\4|\5|\6)(.)(?!$|.*\7))
[0-9]+
$
Because JavaScript only support up to \9, you can't use this method to check more than 10 different characters. At this point you should really question whether regex is a right tool for this job though 😉.
You actually can do this with a regex, but it'll be very ugly and near-incomprehensible.
Looking at just one case, we can write a regex for a string that contains 0, 1, and at least one number from 2 to 9:
/^0(\d)*1(\d)*[2-9](\d)*$/
For each possible pair of numbers, we would have to write a regex like this ad combine all of them using | so that we catch all cases.
There are 10*9 = 90 pairs of distinct digits, and for each group there are 2 permutations, totalling 180 groups.
So we would have to do:
/(^0(\d)*1(\d)*[2-9](\d)*$/)|(^0(\d)*2(\d)*(1|[3-9])(\d)*$/)| ... /
Continuing for all 180 groups. That would be a gigantic regex, and would probably take a very long while to compile.
You should do this validation with code instead of running a regex.
EDIT: Apparently, JavaScript regexes have some extended features which make this doable, namely reusing the value captured by a given group. refer to #kennytm's answerr.
Based on negative lookaheads you can ensure a different digit matches three times:
(\d)\d*?((?!\1)\d)\d*?(?!\1|\2)\d+$
RegEx Demo
(?!\1) negative lookahead to ensure next match is not same as captured group #1
(?!\1|\2) negative lookahead to ensure next match is not same as captured group #1 and group #2
function uniqueDigit(str)
{
var a = 0;
for (var i=0; i<10; i++)
{
(new RegExp( i, 'g')).test(str) && a++;
}
return a
}
console.log(uniqueDigit('10122211100')) // in str
console.log(uniqueDigit(222111333888)) // in int
var a = 123456;
console.log(/^\d{6}$/.test(a) && uniqueDigit(a) > 2) // example

Count number of characters present in foreign language

Is there any optimal way to implement character count for non English letters? For example, if we take the word "Mother" in English, it is a 6 letter word. But if you type the same word(மதர்) in Tamil, it is a three letter word(ம+த+ர்) but the last letter(ர்) will be considered as two characters(ர+ஂ=ர்) by the system. So is there any way to count the number of real characters?
One clue is that if we move the cursor in keyboard into the word (மதர்), it will pass through 3 letters only and not into 4 chars considering by the system, so is there any way to find the solution by using this? Any help on this would be greatly appreciated...
Update
Back from lunch =)
I'm afraid that the previous won't work this well with any foreign language
So i added another fiddle with a possible way
var UnicodeNsm = [Array 1280] //It holds all escaped Unicode Non Space Marks
function countNSMString(str) {
var chars = str.split("");
var count = 0;
for (var i = 0,ilen = chars.length;i<ilen;i++) {
if(UnicodeNsm.indexOf(escape(chars[i])) == -1) {
count++;
}
}
return count;
}
var English = "Mother";
var Tamil = "மதர்";
var Vietnamese = "mẹ"
var Hindi = "मां"
function logL (str) {
console.log(str + " has " + countNSMString(str) + " visible Characters and " + str.length + " normal Characters" ); //"மதர் has 3 visible Characters"
}
logL(English) //"Mother has 6 visible Characters and 6 normal Characters"
logL(Tamil) //"மதர் has 3 visible Characters and 4 normal Characters"
logL(Vietnamese) //"mẹ has 2 visible Characters and 3 normal Characters"
logL(Hindi) //"मां has 1 visible Characters and 3 normal Characters"
So this just checks if theres any Character in the String which is a Unicode NSM character and ignores the count for this, this should work for the Most languages, not Tamil only,
And an array with 1280 Elements shouldn't be that big of a performance issue
Here is a list with the Unicode NSM's
http://www.fileformat.info/info/unicode/category/Mn/list.htm
Here is the according JSBin
After experimenting a bit with string operations, it turns out
String.indexOf returns the same for
"ர்" and for "ர"
meaning
"ர்ரர".indexOf("ர்") == "ர்ரர".indexOf("ர" + "்") //true but
"ர்ரர".indexOf("ர") == "ர்ரர".indexOf("ர" + "ர") //false
I took this opportunity and tried something like this
//ர்
var char = "ரர்ர்ரர்்";
var char2 = "ரரர்ர்ரர்்";
var char3 = "ர்ரர்ர்ரர்்";
function countStr(str) {
var chars = str.split("");
var count = 0;
for(var i = 0, ilen = chars.length;i<ilen;i++) {
var chars2 = chars[i] + chars[i+1];
if (str.indexOf(chars[i]) == str.indexOf(chars2))
i += 1;
count++;
}
return count;
}
console.log("--");
console.log(countStr(char)); //6
console.log(countStr(char2)); //7
console.log(countStr(char3)); //7
Which seems to work for the String above, it may take some adjustments, as i don't know a thing about Encoding and stuff, but maybe its a point you can begin with
Heres the JSBin
You can ignore combining marks in the count calculation with this function:
function charCount( str ) {
var re = /[\u0300-\u036f\u1dc0-\u1dff\u20d0-\u20ff\ufe20-\ufe2f\u0b82\u0b83\u0bbe\u0bbf\u0bc0-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7]/g
return str.replace( re, "").length;
}
console.log(charCount('மதர்'))// 3
//More tests on random Tamil text:
//Paint the text character by character to verify, for instance 'யெ' is a single character, not 2
console.log(charCount("மெய்யெழுத்துக்கள்")); //9
console.log(charCount("ஒவ்வொன்றுடனும்")); //8
console.log(charCount("தமிழ்")); //3
console.log(charCount("வருகின்றனர்.")); //8
console.log(charCount("எழுதப்படும்")); //7
The Tamil signs and marks are not composed into single characters with their target character in unicode, so normalization wouldn't help. I have added all the Tamil combining marks or signs manually
to the regex, but it also includes the ranges for normal combining marks, so charCount("ä") is 1 regardless of normalization form.

Regular Expression for formatting numbers in JavaScript

I need to display a formatted number on a web page using JavaScript. I want to format it so that there are commas in the right places. How would I do this with a regular expression? I've gotten as far as something like this:
myString = myString.replace(/^(\d{3})*$/g, "${1},");
...and then realized this would be more complex than I think (and the regex above is not even close to what I need). I've done some searching and I'm having a hard time finding something that works for this.
Basically, I want these results:
45 becomes 45
3856 becomes 3,856
398868483992 becomes 398,868,483,992
...you get the idea.
This can be done in a single regex, no iteration required. If your browser supports ECMAScript 2018, you could simply use lookaround and just insert commas at the right places:
Search for (?<=\d)(?=(\d\d\d)+(?!\d)) and replace all with ,
In older versions, JavaScript doesn't support lookbehind, so that doesn't work. Fortunately, we only need to change a little bit:
Search for (\d)(?=(\d\d\d)+(?!\d)) and replace all with \1,
So, in JavaScript, that would look like:
result = subject.replace(/(\d)(?=(\d\d\d)+(?!\d))/g, "$1,");
Explanation: Assert that from the current position in the string onwards, it is possible to match digits in multiples of three, and that there is a digit left of the current position.
This will also work with decimals (123456.78) as long as there aren't too many digits "to the right of the dot" (otherwise you get 123,456.789,012).
You can also define it in a Number prototype, as follows:
Number.prototype.format = function(){
return this.toString().replace(/(\d)(?=(\d{3})+(?!\d))/g, "$1,");
};
And then using it like this:
var num = 1234;
alert(num.format());
Credit: Jeffrey Friedl, Mastering Regular Expressions, 3rd. edition, p. 66-67
Formatting a number can be handled elegantly with one line of code.
This code extends the Number object; usage examples are included below.
Code:
Number.prototype.format = function () {
return this.toString().split( /(?=(?:\d{3})+(?:\.|$))/g ).join( "," );
};
How it works
The regular expression uses a look-ahead to find positions within the string where the only thing to the right of it is one or more groupings of three numbers, until either a decimal or the end of string is encountered. The .split() is used to break the string at those points into array elements, and then the .join() merges those elements back into a string, separated by commas.
The concept of finding positions within the string, rather than matching actual characters, is important in order to split the string without removing any characters.
Usage examples:
var n = 9817236578964235;
alert( n.format() ); // Displays "9,817,236,578,964,235"
n = 87345.87;
alert( n.format() ); // Displays "87,345.87"
Of course, the code can easily be extended or changed to handle locale considerations. For example, here is a new version of the code that automatically detects the locale settings and swaps the use of commas and periods.
Locale-aware version:
Number.prototype.format = function () {
if ((1.1).toLocaleString().indexOf(".") >= 0) {
return this.toString().split( /(?=(?:\d{3})+(?:\.|$))/g ).join( "," );
}
else {
return this.toString().split( /(?=(?:\d{3})+(?:,|$))/g ).join( "." );
}
};
Unless it's really necessary, I prefer the simplicity of the first version though.
With the caveat that Intl.NumberFormat and Number.toLocaleString() are now there for this purpose in JavaScript:
The other answers using regular expressions all break down for decimal numbers (although the authors seem to not know this because they have only tested with 1 or 2 decimal places). This is because without lookbehind, JS regular expressions have no way to know whether you are working with the block of digits before or after the decimal point. That leaves two ways to address this with JS regular expressions:
Know whether there is a decimal point in the number, and use different regular expressions depending on that:
/(\d)(?=(\d{3})+$)/g for integers
/(\d)(?=(\d{3})+\.)/g for decimals
Use two regular expressions, one to match the decimal portion, and a second to do a replace on it.
function format(num) {
return num.toString().replace(/^[+-]?\d+/, function(int) {
return int.replace(/(\d)(?=(\d{3})+$)/g, '$1,');
});
}
console.log(format(332432432))
console.log(format(332432432.3432432))
console.log(format(-332432432))
console.log(format(1E6))
console.log(format(1E-6))
function numberWithCommas(x) {
return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
}
var num=numberWithCommas(2000000); //any number
console.log(num);
enter code here
Try this
// You might want to take decimals into account
Number.prototype.commas= function(){
var s= '', temp,
num= this.toString().split('.'), n=num[0];
while(n.length> 3){
temp= n.substring(n.length-3);
s= ','+temp+s;
n= n.slice(0, -3);
}
if(n) s= n+s;
if(num[1]) s+='.'+num[1];
return s;
}
var n= 10000000000.34;
n.commas() = returned value: (String) 10,000,000,000.34
underscore.string has a nice implementation.
I've amended it slightly to accept numeric strings.
function numberFormat(number, dec, dsep, tsep) {
if (isNaN(number) || number == null) return '';
number = parseFloat(number).toFixed(~~dec);
tsep = typeof tsep == 'string' ? tsep : ',';
var parts = number.split('.'),
fnums = parts[0],
decimals = parts[1] ? (dsep || '.') + parts[1] : '';
return fnums.replace(/(\d)(?=(?:\d{3})+$)/g, '$1' + tsep) + decimals;
}
console.log(numberFormat(123456789))
console.log(numberFormat(123456789.123456789))
console.log(numberFormat(-123456789))
console.log(numberFormat(1E6))
console.log(numberFormat(1E-6))
console.log('---')
console.log(numberFormat(123456789, 6, ',', '_'))
console.log(numberFormat(123456789.123456789, 6, ',', '_'))
console.log(numberFormat(-123456789, 6, ',', '_'))
console.log(numberFormat(1E6, 6, ',', '_'))
console.log(numberFormat(1E-6, 6, ',', '_'))
One RegExp for integers and decimals:
// Formats number 1234.5678 into string "1 234.5678".
function formatNumber(number: number): string {
return number.toString().replace(/(?<!(\.\d*|^.{0}))(?=(\d{3})+(?!\d))/g, ' ');
}
console.log(formatNumber(1234.5678)); // "1 234.5678"
console.log(formatNumber(123)); // "123"
console.log(formatNumber(123.45678)); // "123.45678"
console.log(formatNumber(123456789.11111111)); // "123 456 789.1111111"
Try something like this:
function add_commas(numStr)
{
numStr += '';
var x = numStr.split('.');
var x1 = x[0];
var x2 = x.length > 1 ? '.' + x[1] : '';
var rgx = /(\d+)(\d{3})/;
while (rgx.test(x1)) {
x1 = x1.replace(rgx, '$1' + ',' + '$2');
}
return x1 + x2;
}
If you really want a regex, you can use two in a while loop:
while(num.match(/\d{4}/)) {
num = num.replace(/(\d{3})(,\d|$)/, ',$1$2');
}
And if you want to be fancy, you can format numbers with decimal points too:
while(num.match(/\d{4}(\,|\.)/)) {
num = num.replace(/(\d{3})(,\d|$|\.)/, ',$1$2');
}
Edit:
You can also do this with 2 regular expressions and no loop, splits, joins, etc:
num = num.replace(/(\d{1,2}?)((\d{3})+)$/, "$1,$2");
num = num.replace(/(\d{3})(?=\d)/g, "$1,");
The first regex puts a comma after the first 1 or 2 digits if the remaining number of digits is divisible by three. The second regex places a comma after every remaining group of 3 digits.
These won't work with decimals, but they work great for positive and negative integers.
Test output:
45
3,856
398,868,483,992
635
12,358,717,859,918,856
-1,388,488,184
Someone mentioned that lookbehind isn't possible in Javascript RegExp. Here is a great page that explains how to use lookaround (lookahead and lookbehind).
http://www.regular-expressions.info/lookaround.html
I think you would necessarily have to do multiple passes to achieve this with regular expressions. Try the following:
Run a regex for one digit followed by 3 digits.
If that regex matches, replace it with the first digit, then a comma, then the next 3 digits.
Repeat until (1) finds no matches.
Iteration isn't necessary
function formatNumber(n, separator) {
separator = separator || ",";
n = n.toString()
.split("").reverse().join("")
.replace(/(\d{3})/g, "$1" + separator)
.split("").reverse().join("");
// Strings that have a length that is a multiple of 3 will have a leading separator
return n[0] == separator ? n.substr(1) : n;
}
var testCases = [1, 45, 2856, 398868483992];
for ( var i in testCases ) {
if ( !ns.hasOwnProperty(i) ) { continue; }
console.info(testCases[i]);
console.log(formatNumber(testCases[i]));
}
Results
1
1
45
45
2856
2,856
398868483992
398,868,483,992
First reverse a character array, then add commas after every third number unless it's just before the end of the string or before a - sign. Then reverse the character array again and make it a string again.
function add_commas(numStr){
return numStr.split('').reverse().join('').replace(/(\d{3})(?=[^$|^-])/g, "$1,").split('').reverse().join('');
}
Brandon,
I didn't see too many answers working the regex from the decimal point back, so I thought I might chime in.
I wondered if there is any elegant benefit to re-writing the regexp to scan from the back forward...
function addCommas(inputText) {
// pattern works from right to left
var commaPattern = /(\d+)(\d{3})(\.\d*)*$/;
var callback = function (match, p1, p2, p3) {
return p1.replace(commaPattern, callback) + ',' + p2 + (p3 || '');
};
return inputText.replace(commaPattern, callback);
}
>> Fiddle Demo <<
This accounts for any decimal place.
After so much searching, I generate a regex which accepts all formats
(\d+[-, ,(]{0,3}\d+[-, ,(,)]{0,3}\d+[-, ,(,)]{0,3}\d+[)]{0,2})

Categories