Match exact word with new lines anywhere using JavaScript

Match exact word with new lines anywhere using JavaScript - javascript

How can I match following patterns using JavaScript programming language?
sample (not sample in sampleword)
sam\nple
sa\nmple
I used \b for boundaries for full word match.
I'm trying to find an efficient way to do newline match anywhere in the word.
For case 2: \bsam[\s\S]ple\b works.
The same can be adapted for case3 as well to match.
But, Is there a way to have single pattern match all of those ?

It looks like you're asking how to match a string with a single optional space anywhere in the string? If so, that looks like this:
function getRegEx(word) {
var patterns = word.split('').map(function (letter, index) {
return (
word.slice(0, index) +
'\\s?' +
word.slice(index)
)
})
patterns.shift()
return new RegExp('^' + patterns.join('|') + '$');
}
getRegEx('pattern').test('pat tern') // --> true
However, if there can be multiple spaces in the string (as in 's amp le'), then it would be as follows:
function getRegEx(word) {
word = word.split('')
word = word.map(function (letter) {
return letter + '\\s?'
})
return new RegExp('^' + word.join('') + '$')
}
getRegEx('pattern').test('pat tern') // --> true
Per OP's request (see comments):
If you want to look for an indefinite number of space characters, pass true to the following function (as the second param). Otherwise, it'll just find one:
function getRegEx(word, mult) {
var spaceCount = mult ? '*' : '?'
word = word.split('')
word = word.map(function (letter) {
return letter + '\\s' + spaceCount
})
return new RegExp('^' + word.join('') + '$')
}
getRegEx('pattern', true).test('pat tern') // --> true
If you wish to specify where in the pattern spaces may appear, that can be done as follows:
function getRegEx(word, mult) {
word = word.split('')
var spaceCount = mult ? '*' : '?'
var positions = []
word.forEach(function (char, index) {
if (char === ' ') positions.push(index)
})
positions.forEach(function (pos) {
word.splice(pos, 1, '\\s' + spaceCount)
})
return new RegExp('^' + word.join('') + '$')
}
Put a space in your pattern string wherever you want there to be spaces matched. As in:
getRegEx('p at tern', true).test('p at tern') // --> true

You can remove \n using replace and compare that to sample using word boundaries:
/\bsample\b/i.test('sa\nmple'.replace(/\n/g, ''))
true
/\bsample\b/i.test('sam\nple'.replace(/\n/g, ''))
true
/\bsample\b/i.test('sample'.replace(/\n/g, ''))
true
/\bsample\b/i.test('sample'.replace(/\n/g, ''))
true
/\bsample\b/i.test('some sam\nple'.replace(/\n/g, ''))
true
\bsample\b/i.test('sam\nples'.replace(/\n/g, ''))
false

Related

Capitalize first letter of each word in JS

I'm learning how to capitalize the first letter of each word in a string and for this solution I understand everything except the word.substr(1) portion. I see that it's adding the broken string but how does the (1) work?
function toUpper(str) {
return str
.toLowerCase()
.split(' ')
.map(function(word) {
return word[0].toUpperCase() + word.substr(1);
})
.join(' ');
}
console.log(toUpper("hello friend"))

The return value contain 2 parts:
return word[0].toUpperCase() + word.substr(1);
1) word[0].toUpperCase(): It's the first capital letter
2) word.substr(1) the whole remain word except the first letter which has been capitalized. This is document for how substr works.
Refer below result if you want to debug:
function toUpper(str) {
return str
.toLowerCase()
.split(' ')
.map(function(word) {
console.log("First capital letter: "+word[0]);
console.log("remain letters: "+ word.substr(1));
return word[0].toUpperCase() + word.substr(1);
})
.join(' ');
}
console.log(toUpper("hello friend"))

Or you could save a lot of time and use Lodash
Look at
https://lodash.com/docs/4.17.4#startCase -added/edited-
https://lodash.com/docs/4.17.4#capitalize
Ex.
-added/edited-
You may what to use startCase, another function for capitalizing first letter of each word.
_.startCase('foo bar');
// => 'Foo Bar'
and capitalize for only the first letter on the sentence
_.capitalize('FRED');
// => 'Fred'
Lodash is a beautiful js library made to save you a lot of time.
There you will find a lot of time saver functions for strings, numbers, arrays, collections, etc.
Also you can use it on client or server (nodejs) side, use bower or node, cdn or include it manually.

Here is a quick code snippet. This code snippet will allow you to capitalize the first letter of a string using JavaScript.
function capitlizeText(word)
{
return word.charAt(0).toUpperCase() + word.slice(1);
}

The regexp /\b\w/ matches a word boundary followed by a word character. You can use this with the replace() string method to match then replace such characters (without the g (global) regexp flag only the first matching char is replaced):
> 'hello my name is ...'.replace(/\b\w/, (c) => c.toUpperCase());
'Hello my name is ...'
> 'hello my name is ...'.replace(/\b\w/g, (c) => c.toUpperCase());
'Hello My Name Is ...'

function titleCase(str) {
return str.toLowerCase().split(' ').map(x=>x[0].toUpperCase()+x.slice(1)).join(' ');
}
titleCase("I'm a little tea pot");
titleCase("sHoRt AnD sToUt");

The major part of the answers explains to you how works the substr(1). I give to you a better aproach to resolve your problem
function capitalizeFirstLetters(str){
return str.toLowerCase().replace(/^\w|\s\w/g, function (letter) {
return letter.toUpperCase();
})
}
Explanation:
- First convert the entire string to lower case
- Second check the first letter of the entire string and check the first letter that have a space character before and replaces it applying .toUpperCase() method.
Check this example:
function capitalizeFirstLetters(str){
return str.toLowerCase().replace(/^\w|\s\w/g, function (letter) {
return letter.toUpperCase();
})
}
console.log(capitalizeFirstLetters("a lOt of words separated even much spaces "))

Consider an arrow function with an implicit return:
word => `${word.charAt(0).toUpperCase()}${word.slice(1).toLowerCase()}`
This will do it in one line.

Using ES6
let captalizeWord = text => text.toLowerCase().split(' ').map( (i, j) => i.charAt(0).toUpperCase()+i.slice(1)).join(' ')
captalizeWord('cool and cool')

substr is a function that returns (from the linked MDN) a new string containing the extracted section of the given string (starting from the second character in your function). There is a comment on the polyfill implementation as well, which adds Get the substring of a string.

function titlecase(str){
let titlecasesentence = str.split(' ');
titlecasesentence = titlecasesentence.map((word)=>{
const firstletter = word.charAt(0).toUpperCase();
word = firstletter.concat(word.slice(1,word.length));
return word;
});
titlecasesentence = titlecasesentence.join(' ');
return titlecasesentence;
}
titlecase('this is how to capitalize the first letter of a word');

const capitalize = str => {
if (typeof str !== 'string') {
throw new Error('Invalid input: input must of type "string"');
}
return str
.trim()
.replace(/ {1,}/g, ' ')
.toLowerCase()
.split(' ')
.map(word => word[0].toUpperCase() + word.slice(1))
.join(' ');
};
sanitize the input string with trim() to remove whitespace from the leading and trailing ends
replace any extra spaces in the middle with a RegExp
normalize and convert it all toLowerCase() letters
convert the string to an array split on spaces
map that array into an array of capitalized words
join(' ') the array with spaces and return the newly capitalized string

Whole sentence will be capitalize only by one line
"my name is John".split(/ /g).map(val => val[0].toUpperCase() + val.slice(1)).join(' ')
Output "My Name Is John"

A nice simple solution, using pure JavaScript. JSFiddle
function initCap(s) {
var result = '';
if ((typeof (s) === 'undefined') || (s == null)) {
return result;
}
s = s.toLowerCase();
var words = s.split(' ');
for (var i = 0; i < words.length; ++i) {
result += (i > 0 ? ' ' : '') +
words[i].substring(0, 1).toUpperCase() +
words[i].substring(1);
}
return result;
}

Here is an example of how substr works: When you pass in a number, it takes a portion of the string based on the index you provided:
console.log('Testing string'.substr(0)); // Nothing different
console.log('Testing string'.substr(1)); // Starts from index 1 (position 2)
console.log('Testing string'.substr(2));
So, they are taking the first letter of each word, capitalizing it, and then adding on the remaining of the word. Ance since you are only capitalizing the first letter, the index to start from is always 1.

In word.substr(i), the param means the index of the word. This method cuts the word from the letter whose index equals i to the end of the word.
You can also add another param like word.substr(i, len), where len means the length of the character segmentation. For example:
'abcde'.substr(1, 2) → bc.

function toTitleCase(str)
{
return str.replace(/\w\S*/g, function(txt){return
txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();});
}

Just map through if an array set the first letter as uppercase and concatenate with other letters from index 1.
The array isn't your case here.
const capitalizeNames = (arr) => {
arr.map((name) => {
let upper = name[0].toUpperCase() + name.substr(1)
console.log(upper)
})
}

Here's another clean way of Capitalizing sentences/names/... :
const capitalizeNames =(name)=>{
const names = name.split(' ') // ['kouhadi','aboubakr',essaaddik']
const newCapName = [] // declaring an empty array
for (const n of names){
newCapName.push(n.replace(n[0], n[0].toUpperCase()));
}
return newCapName.join(' ')
}
capitalizeNames('kouhadi aboubakr essaaddik'); // 'Kouhadi Aboubakr Essaaddik'

You could use these lines of code:
function toUpper(str) {
return [str.split('')[0].toUpperCase(), str.split('').slice(1, str.split('').length).join("")].join("")
}
Basically it will split all characters, slice it, create a new array without the first entry/character and replace the first entry/character with an uppercase verion of the character.
(Yes, this was tested and it works on Edge, Chrome and newer versions of Internet Explorer.)
This is probably not the greatest answer, but hopefully it works well enough for you.

How to separate the values of a line of .csv file which contains commas in data? [duplicate]

I have the following type of string
var string = "'string, duppi, du', 23, lala"
I want to split the string into an array on each comma, but only the commas outside the single quotation marks.
I can't figure out the right regular expression for the split...
string.split(/,/)
will give me
["'string", " duppi", " du'", " 23", " lala"]
but the result should be:
["string, duppi, du", "23", "lala"]
Is there a cross-browser solution?

Disclaimer
2014-12-01 Update: The answer below works only for one very specific format of CSV. As correctly pointed out by DG in the comments, this solution does NOT fit the RFC 4180 definition of CSV and it also does NOT fit MS Excel format. This solution simply demonstrates how one can parse one (non-standard) CSV line of input which contains a mix of string types, where the strings may contain escaped quotes and commas.
A non-standard CSV solution
As austincheney correctly points out, you really need to parse the string from start to finish if you wish to properly handle quoted strings that may contain escaped characters. Also, the OP does not clearly define what a "CSV string" really is. First we must define what constitutes a valid CSV string and its individual values.
Given: "CSV String" Definition
For the purpose of this discussion, a "CSV string" consists of zero or more values, where multiple values are separated by a comma. Each value may consist of:
A double quoted string. (may contain unescaped single quotes.)
A single quoted string. (may contain unescaped double quotes.)
A non-quoted string. (may NOT contain quotes, commas or backslashes.)
An empty value. (An all whitespace value is considered empty.)
Rules/Notes:
Quoted values may contain commas.
Quoted values may contain escaped-anything, e.g. 'that\'s cool'.
Values containing quotes, commas, or backslashes must be quoted.
Values containing leading or trailing whitespace must be quoted.
The backslash is removed from all: \' in single quoted values.
The backslash is removed from all: \" in double quoted values.
Non-quoted strings are trimmed of any leading and trailing spaces.
The comma separator may have adjacent whitespace (which is ignored).
Find:
A JavaScript function which converts a valid CSV string (as defined above) into an array of string values.
Solution:
The regular expressions used by this solution are complex. And (IMHO) all non-trivial regexes should be presented in free-spacing mode with lots of comments and indentation. Unfortunately, JavaScript does not allow free-spacing mode. Thus, the regular expressions implemented by this solution are first presented in native regex syntax (expressed using Python's handy: r'''...''' raw-multi-line-string syntax).
First here is a regular expression which validates that a CVS string meets the above requirements:
Regex to validate a "CSV string":
re_valid = r"""
# Validate a CSV string having single, double or un-quoted values.
^ # Anchor to start of string.
\s* # Allow whitespace before value.
(?: # Group for value alternatives.
'[^'\\]*(?:\\[\S\s][^'\\]*)*' # Either Single quoted string,
| "[^"\\]*(?:\\[\S\s][^"\\]*)*" # or Double quoted string,
| [^,'"\s\\]*(?:\s+[^,'"\s\\]+)* # or Non-comma, non-quote stuff.
) # End group of value alternatives.
\s* # Allow whitespace after value.
(?: # Zero or more additional values
, # Values separated by a comma.
\s* # Allow whitespace before value.
(?: # Group for value alternatives.
'[^'\\]*(?:\\[\S\s][^'\\]*)*' # Either Single quoted string,
| "[^"\\]*(?:\\[\S\s][^"\\]*)*" # or Double quoted string,
| [^,'"\s\\]*(?:\s+[^,'"\s\\]+)* # or Non-comma, non-quote stuff.
) # End group of value alternatives.
\s* # Allow whitespace after value.
)* # Zero or more additional values
$ # Anchor to end of string.
"""
If a string matches the above regex, then that string is a valid CSV string (according to the rules previously stated) and may be parsed using the following regex. The following regex is then used to match one value from the CSV string. It is applied repeatedly until no more matches are found (and all values have been parsed).
Regex to parse one value from valid CSV string:
re_value = r"""
# Match one value in valid CSV string.
(?!\s*$) # Don't match empty last value.
\s* # Strip whitespace before value.
(?: # Group for value alternatives.
'([^'\\]*(?:\\[\S\s][^'\\]*)*)' # Either $1: Single quoted string,
| "([^"\\]*(?:\\[\S\s][^"\\]*)*)" # or $2: Double quoted string,
| ([^,'"\s\\]*(?:\s+[^,'"\s\\]+)*) # or $3: Non-comma, non-quote stuff.
) # End group of value alternatives.
\s* # Strip whitespace after value.
(?:,|$) # Field ends on comma or EOS.
"""
Note that there is one special case value that this regex does not match - the very last value when that value is empty. This special "empty last value" case is tested for and handled by the js function which follows.
JavaScript function to parse CSV string:
// Return array of string values, or NULL if CSV string not well formed.
function CSVtoArray(text) {
var re_valid = /^\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*(?:,\s*(?:'[^'\\]*(?:\\[\S\s][^'\\]*)*'|"[^"\\]*(?:\\[\S\s][^"\\]*)*"|[^,'"\s\\]*(?:\s+[^,'"\s\\]+)*)\s*)*$/;
var re_value = /(?!\s*$)\s*(?:'([^'\\]*(?:\\[\S\s][^'\\]*)*)'|"([^"\\]*(?:\\[\S\s][^"\\]*)*)"|([^,'"\s\\]*(?:\s+[^,'"\s\\]+)*))\s*(?:,|$)/g;
// Return NULL if input string is not well formed CSV string.
if (!re_valid.test(text)) return null;
var a = []; // Initialize array to receive values.
text.replace(re_value, // "Walk" the string using replace with callback.
function(m0, m1, m2, m3) {
// Remove backslash from \' in single quoted values.
if (m1 !== undefined) a.push(m1.replace(/\\'/g, "'"));
// Remove backslash from \" in double quoted values.
else if (m2 !== undefined) a.push(m2.replace(/\\"/g, '"'));
else if (m3 !== undefined) a.push(m3);
return ''; // Return empty string.
});
// Handle special case of empty last value.
if (/,\s*$/.test(text)) a.push('');
return a;
};
Example input and output:
In the following examples, curly braces are used to delimit the {result strings}. (This is to help visualize leading/trailing spaces and zero-length strings.)
// Test 1: Test string from original question.
var test = "'string, duppi, du', 23, lala";
var a = CSVtoArray(test);
/* Array hes 3 elements:
a[0] = {string, duppi, du}
a[1] = {23}
a[2] = {lala} */
// Test 2: Empty CSV string.
var test = "";
var a = CSVtoArray(test);
/* Array hes 0 elements: */
// Test 3: CSV string with two empty values.
var test = ",";
var a = CSVtoArray(test);
/* Array hes 2 elements:
a[0] = {}
a[1] = {} */
// Test 4: Double quoted CSV string having single quoted values.
var test = "'one','two with escaped \' single quote', 'three, with, commas'";
var a = CSVtoArray(test);
/* Array hes 3 elements:
a[0] = {one}
a[1] = {two with escaped ' single quote}
a[2] = {three, with, commas} */
// Test 5: Single quoted CSV string having double quoted values.
var test = '"one","two with escaped \" double quote", "three, with, commas"';
var a = CSVtoArray(test);
/* Array hes 3 elements:
a[0] = {one}
a[1] = {two with escaped " double quote}
a[2] = {three, with, commas} */
// Test 6: CSV string with whitespace in and around empty and non-empty values.
var test = " one , 'two' , , ' four' ,, 'six ', ' seven ' , ";
var a = CSVtoArray(test);
/* Array hes 8 elements:
a[0] = {one}
a[1] = {two}
a[2] = {}
a[3] = { four}
a[4] = {}
a[5] = {six }
a[6] = { seven }
a[7] = {} */
Additional notes:
This solution requires that the CSV string be "valid". For example, unquoted values may not contain backslashes or quotes, e.g. the following CSV string is NOT valid:
var invalid1 = "one, that's me!, escaped \, comma"
This is not really a limitation because any sub-string may be represented as either a single or double quoted value. Note also that this solution represents only one possible definition for: "Comma Separated Values".
Edit: 2014-05-19: Added disclaimer.
Edit: 2014-12-01: Moved disclaimer to top.

RFC 4180 solution
This does not solve the string in the question since its format is not conforming with RFC 4180; the acceptable encoding is escaping double quote with double quote. The solution below works correctly with CSV files d/l from google spreadsheets.
UPDATE (3/2017)
Parsing single line would be wrong. According to RFC 4180 fields may contain CRLF which will cause any line reader to break the CSV file. Here is an updated version that parses CSV string:
'use strict';
function csvToArray(text) {
let p = '', row = [''], ret = [row], i = 0, r = 0, s = !0, l;
for (l of text) {
if ('"' === l) {
if (s && l === p) row[i] += l;
s = !s;
} else if (',' === l && s) l = row[++i] = '';
else if ('\n' === l && s) {
if ('\r' === p) row[i] = row[i].slice(0, -1);
row = ret[++r] = [l = '']; i = 0;
} else row[i] += l;
p = l;
}
return ret;
};
let test = '"one","two with escaped """" double quotes""","three, with, commas",four with no quotes,"five with CRLF\r\n"\r\n"2nd line one","two with escaped """" double quotes""","three, with, commas",four with no quotes,"five with CRLF\r\n"';
console.log(csvToArray(test));
OLD ANSWER
(Single line solution)
function CSVtoArray(text) {
let ret = [''], i = 0, p = '', s = true;
for (let l in text) {
l = text[l];
if ('"' === l) {
s = !s;
if ('"' === p) {
ret[i] += '"';
l = '-';
} else if ('' === p)
l = '-';
} else if (s && ',' === l)
l = ret[++i] = '';
else
ret[i] += l;
p = l;
}
return ret;
}
let test = '"one","two with escaped """" double quotes""","three, with, commas",four with no quotes,five for fun';
console.log(CSVtoArray(test));
And for the fun, here is how you create CSV from the array:
function arrayToCSV(row) {
for (let i in row) {
row[i] = row[i].replace(/"/g, '""');
}
return '"' + row.join('","') + '"';
}
let row = [
"one",
"two with escaped \" double quote",
"three, with, commas",
"four with no quotes (now has)",
"five for fun"
];
let text = arrayToCSV(row);
console.log(text);

I liked FakeRainBrigand's answer, however it contains a few problems: It can not handle whitespace between a quote and a comma, and does not support 2 consecutive commas. I tried editing his answer but my edit got rejected by reviewers that apparently did not understand my code. Here is my version of FakeRainBrigand's code.
There is also a fiddle: http://jsfiddle.net/xTezm/46/
String.prototype.splitCSV = function() {
var matches = this.match(/(\s*"[^"]+"\s*|\s*[^,]+|,)(?=,|$)/g);
for (var n = 0; n < matches.length; ++n) {
matches[n] = matches[n].trim();
if (matches[n] == ',') matches[n] = '';
}
if (this[0] == ',') matches.unshift("");
return matches;
}
var string = ',"string, duppi, du" , 23 ,,, "string, duppi, du",dup,"", , lala';
var parsed = string.splitCSV();
alert(parsed.join('|'));

I had a very specific use case where I wanted to copy cells from Google Sheets into my web app. Cells could include double-quotes and new-line characters. Using copy and paste, the cells are delimited by a tab characters, and cells with odd data are double quoted. I tried this main solution, the linked article using regexp, and Jquery-CSV, and CSVToArray. http://papaparse.com/ Is the only one that worked out of the box. Copy and paste is seamless with Google Sheets with default auto-detect options.

PEG(.js) grammar that handles RFC 4180 examples at http://en.wikipedia.org/wiki/Comma-separated_values:
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
Test at http://jsfiddle.net/knvzk/10 or https://pegjs.org/online.
Download the generated parser at https://gist.github.com/3362830.

People seemed to be against RegEx for this. Why?
(\s*'[^']+'|\s*[^,]+)(?=,|$)
Here's the code. I also made a fiddle.
String.prototype.splitCSV = function(sep) {
var regex = /(\s*'[^']+'|\s*[^,]+)(?=,|$)/g;
return matches = this.match(regex);
}
var string = "'string, duppi, du', 23, 'string, duppi, du', lala";
console.log( string.splitCSV() );
.as-console-wrapper { max-height: 100% !important; top: 0; }

Adding one more to the list, because I find all of the above not quite "KISS" enough.
This one uses regex to find either commas or newlines while skipping over quoted items. Hopefully this is something noobies can read through on their own. The splitFinder regexp has three things it does (split by a |):
, - finds commas
\r?\n - finds new lines, (potentially with carriage return if the exporter was nice)
"(\\"|[^"])*?" - skips anynthing surrounded in quotes, because commas and newlines don't matter in there. If there is an escaped quote \\" in the quoted item, it will get captured before an end quote can be found.
const splitFinder = /,|\r?\n|"(\\"|[^"])*?"/g;
function csvTo2dArray(parseMe) {
let currentRow = [];
const rowsOut = [currentRow];
let lastIndex = splitFinder.lastIndex = 0;
// add text from lastIndex to before a found newline or comma
const pushCell = (endIndex) => {
endIndex = endIndex || parseMe.length;
const addMe = parseMe.substring(lastIndex, endIndex);
// remove quotes around the item
currentRow.push(addMe.replace(/^"|"$/g, ""));
lastIndex = splitFinder.lastIndex;
}
let regexResp;
// for each regexp match (either comma, newline, or quoted item)
while (regexResp = splitFinder.exec(parseMe)) {
const split = regexResp[0];
// if it's not a quote capture, add an item to the current row
// (quote captures will be pushed by the newline or comma following)
if (split.startsWith(`"`) === false) {
const splitStartIndex = splitFinder.lastIndex - split.length;
pushCell(splitStartIndex);
// then start a new row if newline
const isNewLine = /^\r?\n$/.test(split);
if (isNewLine) { rowsOut.push(currentRow = []); }
}
}
// make sure to add the trailing text (no commas or newlines after)
pushCell();
return rowsOut;
}
const rawCsv = `a,b,c\n"test\r\n","comma, test","\r\n",",",\nsecond,row,ends,with,empty\n"quote\"test"`
const rows = csvTo2dArray(rawCsv);
console.log(rows);

No regexp, readable, and according to https://en.wikipedia.org/wiki/Comma-separated_values#Basic_rules:
function csv2arr(str: string) {
let line = ["",];
const ret = [line,];
let quote = false;
for (let i = 0; i < str.length; i++) {
const cur = str[i];
const next = str[i + 1];
if (!quote) {
const cellIsEmpty = line[line.length - 1].length === 0;
if (cur === '"' && cellIsEmpty) quote = true;
else if (cur === ",") line.push("");
else if (cur === "\r" && next === "\n") { line = ["",]; ret.push(line); i++; }
else if (cur === "\n" || cur === "\r") { line = ["",]; ret.push(line); }
else line[line.length - 1] += cur;
} else {
if (cur === '"' && next === '"') { line[line.length - 1] += cur; i++; }
else if (cur === '"') quote = false;
else line[line.length - 1] += cur;
}
}
return ret;
}

If you can have your quote delimiter be double quotes, then this is a duplicate of Example JavaScript code to parse CSV data.
You can either translate all single-quotes to double-quotes first:
string = string.replace( /'/g, '"' );
...or you can edit the regex in that question to recognize single-quotes instead of double-quotes:
// Quoted fields.
"(?:'([^']*(?:''[^']*)*)'|" +
However, this assumes certain markup that is not clear from your question. Please clarify what all the various possibilities of markup can be, per my comment on your question.

I've used regex a number of times, but I always have to relearn it each time, which is frustrating :-)
So Here's a non-regex solution:
function csvRowToArray(row, delimiter = ',', quoteChar = '"'){
let nStart = 0, nEnd = 0, a=[], nRowLen=row.length, bQuotedValue;
while (nStart <= nRowLen) {
bQuotedValue = (row.charAt(nStart) === quoteChar);
if (bQuotedValue) {
nStart++;
nEnd = row.indexOf(quoteChar + delimiter, nStart)
} else {
nEnd = row.indexOf(delimiter, nStart)
}
if (nEnd < 0) nEnd = nRowLen;
a.push(row.substring(nStart,nEnd));
nStart = nEnd + delimiter.length + (bQuotedValue ? 1 : 0)
}
return a;
}
How it works:
Pass in the csv string in row.
While the start position of the next value is within the row, do the following:
If this value has been quoted, set nEnd to the closing quote.
Else if value has NOT been quoted, set nEnd to the next delimiter.
Add the value to an array.
Set nStart to nEnd plus the length of the delimeter.
Sometimes it's good to write your own small function, rather than use a library. Your own code is going to perform well and use only a small footprint. In addition, you can easily tweak it to suit your own needs.

Regular expressions to the rescue! These few lines of code properly handle quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
Unless stated elsewhere, you don't need a finite state machine. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.
Clone/download code at https://github.com/peterthoeny/parse-csv-js

I have also faced the same type of problem when I had to parse a CSV file.
The file contains a column address which contains the ',' .
After parsing that CSV file to JSON, I get mismatched mapping of the keys while converting it into a JSON file.
I used Node.js for parsing the file and libraries like baby parse and csvtojson.
Example of file -
address,pincode
foo,baar , 123456
While I was parsing directly without using baby parse in JSON, I was getting:
[{
address: 'foo',
pincode: 'baar',
'field3': '123456'
}]
So I wrote code which removes the comma(,) with any other delimiter
with every field:
/*
csvString(input) = "address, pincode\\nfoo, bar, 123456\\n"
output = "address, pincode\\nfoo {YOUR DELIMITER} bar, 123455\\n"
*/
const removeComma = function(csvString){
let delimiter = '|'
let Baby = require('babyparse')
let arrRow = Baby.parse(csvString).data;
/*
arrRow = [
[ 'address', 'pincode' ],
[ 'foo, bar', '123456']
]
*/
return arrRow.map((singleRow, index) => {
//the data will include
/*
singleRow = [ 'address', 'pincode' ]
*/
return singleRow.map(singleField => {
//for removing the comma in the feild
return singleField.split(',').join(delimiter)
})
}).reduce((acc, value, key) => {
acc = acc +(Array.isArray(value) ?
value.reduce((acc1, val)=> {
acc1 = acc1+ val + ','
return acc1
}, '') : '') + '\n';
return acc;
},'')
}
The function returned can be passed into the csvtojson library and thus the result can be used.
const csv = require('csvtojson')
let csvString = "address, pincode\\nfoo, bar, 123456\\n"
let jsonArray = []
modifiedCsvString = removeComma(csvString)
csv()
.fromString(modifiedCsvString)
.on('json', json => jsonArray.push(json))
.on('end', () => {
/* do any thing with the json Array */
})
Now you can get the output like:
[{
address: 'foo, bar',
pincode: 123456
}]

My answer presumes your input is a reflection of code/content from web sources where single and double quote characters are fully interchangeable provided they occur as an non-escaped matching set.
You cannot use regex for this. You actually have to write a micro parser to analyze the string you wish to split. I will, for the sake of this answer, call the quoted parts of your strings as sub-strings. You need to specifically walk across the string. Consider the following case:
var a = "some sample string with \"double quotes\" and 'single quotes' and some craziness like this: \\\" or \\'",
b = "sample of code from JavaScript with a regex containing a comma /\,/ that should probably be ignored.";
In this case you have absolutely no idea where a sub-string starts or ends by simply analyzing the input for a character pattern. Instead you have to write logic to make decisions on whether a quote character is used a quote character, is itself unquoted, and that the quote character is not following an escape.
I am not going to write that level of complexity of code for you, but you can look at something I recently wrote that has the pattern you need. This code has nothing to do with commas, but is otherwise a valid enough micro-parser for you to follow in writing your own code. Look into the asifix function of the following application:
https://github.com/austincheney/Pretty-Diff/blob/master/fulljsmin.js

To complement this answer
If you need to parse quotes escaped with another quote, example:
"some ""value"" that is on xlsx file",123
You can use
function parse(text) {
const csvExp = /(?!\s*$)\s*(?:'([^'\\]*(?:\\[\S\s][^'\\]*)*)'|"([^"\\]*(?:\\[\S\s][^"\\]*)*)"|"([^""]*(?:"[\S\s][^""]*)*)"|([^,'"\s\\]*(?:\s+[^,'"\s\\]+)*))\s*(?:,|$)/g;
const values = [];
text.replace(csvExp, (m0, m1, m2, m3, m4) => {
if (m1 !== undefined) {
values.push(m1.replace(/\\'/g, "'"));
}
else if (m2 !== undefined) {
values.push(m2.replace(/\\"/g, '"'));
}
else if (m3 !== undefined) {
values.push(m3.replace(/""/g, '"'));
}
else if (m4 !== undefined) {
values.push(m4);
}
return '';
});
if (/,\s*$/.test(text)) {
values.push('');
}
return values;
}

While reading the CSV file into a string, it contains null values in between strings, so try it with \0 line by line. It works for me.
stringLine = stringLine.replace(/\0/g, "" );

Try this one.
function parseCSV(csv) {
let quotes = [];
let token = /(?:(['"`])([\s\S]*?)\1)|([^\t,\r\n]+)\3?|([\r\n])/gm;
let text = csv.replace(/\\?(['"`])\1?/gm, s => s.length != 2 ? s : `_r#${quotes.push(s) - 1}`);
return [...text.matchAll(token)]
.map(t => (t[2] || t[3] || t[4])
.replace(/^_r#\d+$/, "")
.replace(/_r#\d+/g, q => quotes[q.replace(/\D+/, '')][1]))
.reduce((a, b) => /^[\r\n]$/g.test(b)
? a.push([]) && a
: a[a.length - 1].push(b) && a, [[]])
.filter(d => d.length);
}

Use the npm library csv-string to parse the strings instead of split: https://www.npmjs.com/package/csv-string
This will handle the comma in quotes and empty entries

This one is based on niry's answer but for semicolon:
'use strict';
function csvToArray(text) {
let p = '', row = [''], ret = [row], i = 0, r = 0, s = !0, l;
for (l of text) {
if ('"' === l) {
if (s && l === p) row[i] += l;
s = !s;
} else if (';' === l && s) l = row[++i] = '';
else if ('\n' === l && s) {
if ('\r' === p) row[i] = row[i].slice(0, -1);
row = ret[++r] = [l = '']; i = 0;
} else row[i] += l;
p = l;
}
return ret;
};
let test = '"one";"two with escaped """" double quotes""";"three; with; commas";four with no quotes;"five with CRLF\r\n"\r\n"2nd line one";"two with escaped """" double quotes""";"three, with; commas and semicolons";four with no quotes;"five with CRLF\r\n"';
console.log(csvToArray(test));

Aside from the excellent and complete answer from ridgerunner, I thought of a very simple workaround for when your backend runs PHP.
Add this PHP file to your domain's backend (say: csv.php)
<?php
session_start(); // Optional
header("content-type: text/xml");
header("charset=UTF-8");
// Set the delimiter and the End of Line character of your CSV content:
echo json_encode(array_map('str_getcsv', str_getcsv($_POST["csv"], "\n")));
?>
Now add this function to your JavaScript toolkit (should be revised a bit to make crossbrowser I believe).
function csvToArray(csv) {
var oXhr = new XMLHttpRequest;
oXhr.addEventListener("readystatechange",
function () {
if (this.readyState == 4 && this.status == 200) {
console.log(this.responseText);
console.log(JSON.parse(this.responseText));
}
}
);
oXhr.open("POST","path/to/csv.php",true);
oXhr.setRequestHeader("Content-type", "application/x-www-form-urlencoded; charset=utf-8");
oXhr.send("csv=" + encodeURIComponent(csv));
}
It will cost you one Ajax call, but at least you won't duplicate code nor include any external library.
Ref: http://php.net/manual/en/function.str-getcsv.php

You can use papaparse.js like the example below:
<!DOCTYPE html>
<html lang="en">
<head>
<title>CSV</title>
</head>
<body>
<input type="file" id="files" multiple="">
<button onclick="csvGetter()">CSV Getter</button>
<h3>The Result will be in the Console.</h3>
<script src="papaparse.min.js"></script>
<script>
function csvGetter() {
var file = document.getElementById('files').files[0];
Papa.parse(file, {
complete: function(results) {
console.log(results.data);
}
});
}
</script>
</body>
</html>
Don't forget to include papaparse.js in the same folder.

According to this blog post, this function should do it:
String.prototype.splitCSV = function(sep) {
for (var foo = this.split(sep = sep || ","), x = foo.length - 1, tl; x >= 0; x--) {
if (foo[x].replace(/'\s+$/, "'").charAt(foo[x].length - 1) == "'") {
if ((tl = foo[x].replace(/^\s+'/, "'")).length > 1 && tl.charAt(0) == "'") {
foo[x] = foo[x].replace(/^\s*'|'\s*$/g, '').replace(/''/g, "'");
} else if (x) {
foo.splice(x - 1, 2, [foo[x - 1], foo[x]].join(sep));
} else foo = foo.shift().split(sep).concat(foo);
} else foo[x].replace(/''/g, "'");
} return foo;
};
You would call it like so:
var string = "'string, duppi, du', 23, lala";
var parsed = string.splitCSV();
alert(parsed.join("|"));
This jsfiddle kind of works, but it looks like some of the elements have spaces before them.

String replace module definition

Below I'm trying to replace the moduleName string with another string replacementModule.
var replacementModule = 'lodash.string' // cheeky
var moduleName = 'underscore.string'
var pattern = new RegExp('^' + moduleName + '(.+)?')
var match = definition.match(pattern)
var outcome = replacementModule + match[1]
However right now a totally different module is matched as well.
underscore.string.f/utils // desired no change
underscore.string.f // desired no change
underscore.string // => lodash.string
underscore.string/utils // => lodash.string/utils
How can I match to the /, and how the outcome that I expect?

You need to do at least 3 things:
Escape the string variable passed to the RegExp
Check if match is null before using it
The regex should contain ($|/.*) as capturing group 1 to match either an end of string or / followed by 0 or more characters.
RegExp.escape = function(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
};
function runRepl(definition, replacementModule, moduleName) {
var pattern = RegExp('^' + RegExp.escape(moduleName) + '($|/.*)');
// ^------------^ ^------^
var match = definition.match(pattern);
if (match !== null) { // Check if we have a match
var outcome = replacementModule + match[1];
document.write(outcome + "<br/>");
}
else {
document.write("N/A<br/>");
}
}
runRepl("underscore.string.f/utils", "lodash.string", "underscore.string");
runRepl("underscore.string.f", "lodash.string", "underscore.string");
runRepl("underscore.string", "lodash.string", "underscore.string");
runRepl("underscore.string/utils", "lodash.string", "underscore.string");
Escaping is necessary to match a literal . inside moduleName and ($|/)(.+)? presumes there can be something after an end of string. Also, (.+)? (1 or more characters) is actually the same as .* which is shorter and easier to read.

Replace underscores with spaces and capitalize words

I am attempting to create a way to convert text with lowercase letters and underscores into text without underscores and the first letter of each word is capitalized.
ex;
options_page = Options Page
At this page: How to make first character uppercase of all words in JavaScript?
I found this regex:
key = key.replace(/(?:_| |\b)(\w)/g, function(key, p1) { return p1.toUpperCase()});
This does everything except replace the underscores with spaces. I have not really tried anything because I am not that familiar with regexpressions.
How can I adjust this regex so it replaces underscores with spaces?

This should do the trick:
function humanize(str) {
var i, frags = str.split('_');
for (i=0; i<frags.length; i++) {
frags[i] = frags[i].charAt(0).toUpperCase() + frags[i].slice(1);
}
return frags.join(' ');
}
console.log(humanize('humpdey_dumpdey'));
// > Humpdey Dumpdey
repl
http://repl.it/OnE
Fiddle:
http://jsfiddle.net/marionebl/nf4NG/
jsPerf:
Most test data: http://jsperf.com/string-transformations
All versions plus _.str: http://jsperf.com/string-transformations/3

Since Lodash 3.1.0, there's a _.startCase([string='']) method that transforms any case into capitalized words (start case):
_.startCase('hello_world'); // returns 'Hello World'
_.startCase('hello-world'); // returns 'Hello World'
_.startCase('hello world'); // returns 'Hello World'
There are other useful methods in the String section of Lodash. Read the documentation here.

These are two different tasks, so two different regexes is the best solution:
key = key.replace(/_/g, ' ').replace(/(?: |\b)(\w)/g, function(key) { return key.toUpperCase()});
To ensure even all capital words is processed. You can add .toLowerCase() before the very first .replace:
console.log('TESTING_WORD'.toLowerCase().replace(/_/g, ' ')
.replace(/(?: |\b)(\w)/g, function(key, p1) {
return key.toUpperCase();
}));

Simply add .replace('_',' ')
Like this
function toCamel(string){
return string.replace(/(?:_| |\b)(\w)/g, function($1){return $1.toUpperCase().replace('_',' ');});
}

Another alternative:
camel = "options_page".replace(/(^|_)(\w)/g, function ($0, $1, $2) {
return ($1 && ' ') + $2.toUpperCase();
});
console.log(camel);
The regular expression:
(^|_) beginning of the input OR "_" ($1)
(\w) a word character (short for [a-zA-Z0-9_]) ($2)
g all occurrences (global)
More about regular expressions : http://www.javascriptkit.com/javatutors/redev.shtml.

Here:
var str = 'Lorem_ipsum_dolor_sit_amet,_consectetur____adipiscing_elit.'
str = str.replace(/_{1,}/g,' ').replace(/(\s{1,}|\b)(\w)/g, function(m, space, letter)
{
return space + letter.toUpperCase();
})
console.log(str);

Converting any string into camel case

How can I convert a string into camel case using javascript regex?
EquipmentClass name or
Equipment className or equipment class name or Equipment Class Name
should all become: equipmentClassName.

Looking at your code, you can achieve it with only two replace calls:
function camelize(str) {
return str.replace(/(?:^\w|[A-Z]|\b\w)/g, function(word, index) {
return index === 0 ? word.toLowerCase() : word.toUpperCase();
}).replace(/\s+/g, '');
}
camelize("EquipmentClass name");
camelize("Equipment className");
camelize("equipment class name");
camelize("Equipment Class Name");
// all output "equipmentClassName"
Edit: Or in with a single replace call, capturing the white spaces also in the RegExp.
function camelize(str) {
return str.replace(/(?:^\w|[A-Z]|\b\w|\s+)/g, function(match, index) {
if (+match === 0) return ""; // or if (/\s+/.test(match)) for white spaces
return index === 0 ? match.toLowerCase() : match.toUpperCase();
});
}

If anyone is using lodash, there is a _.camelCase() function.
_.camelCase('Foo Bar');
// → 'fooBar'
_.camelCase('--foo-bar--');
// → 'fooBar'
_.camelCase('__FOO_BAR__');
// → 'fooBar'

To get camelCase
ES5
var camalize = function camalize(str) {
return str.toLowerCase().replace(/[^a-zA-Z0-9]+(.)/g, function(match, chr)
{
return chr.toUpperCase();
});
}
ES6
var camalize = function camalize(str) {
return str.toLowerCase().replace(/[^a-zA-Z0-9]+(.)/g, (m, chr) => chr.toUpperCase());
}
> To get ***C**amel**S**entence**C**ase* or ***P**ascal**C**ase*
var camelSentence = function camelSentence(str) {
return (" " + str).toLowerCase().replace(/[^a-zA-Z0-9]+(.)/g, function(match, chr)
{
return chr.toUpperCase();
});
}
Note :
For those language with accents. Do include À-ÖØ-öø-ÿ with the regex as following
.replace(/[^a-zA-ZÀ-ÖØ-öø-ÿ0-9]+(.)/g This is only for one language. For another language, you have to search and find

I just ended up doing this:
String.prototype.toCamelCase = function(str) {
return str
.replace(/\s(.)/g, function($1) { return $1.toUpperCase(); })
.replace(/\s/g, '')
.replace(/^(.)/, function($1) { return $1.toLowerCase(); });
}
I was trying to avoid chaining together multiple replace statements. Something where I'd have $1, $2, $3 in my function. But that type of grouping is hard to understand, and your mention about cross browser problems is something I never thought about as well.

You can use this solution :
function toCamelCase(str){
return str.split(' ').map(function(word,index){
// If it is the first word make sure to lowercase all the chars.
if(index == 0){
return word.toLowerCase();
}
// If it is not the first word only upper case the first char and lowercase the rest.
return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
}).join('');
}

In Scott’s specific case I’d go with something like:
String.prototype.toCamelCase = function() {
return this.replace(/^([A-Z])|\s(\w)/g, function(match, p1, p2, offset) {
if (p2) return p2.toUpperCase();
return p1.toLowerCase();
});
};
'EquipmentClass name'.toCamelCase() // -> equipmentClassName
'Equipment className'.toCamelCase() // -> equipmentClassName
'equipment class name'.toCamelCase() // -> equipmentClassName
'Equipment Class Name'.toCamelCase() // -> equipmentClassName
The regex will match the first character if it starts with a capital letter, and any alphabetic character following a space, i.e. 2 or 3 times in the specified strings.
By spicing up the regex to /^([A-Z])|[\s-_](\w)/g it will also camelize hyphen and underscore type names.
'hyphen-name-format'.toCamelCase() // -> hyphenNameFormat
'underscore_name_format'.toCamelCase() // -> underscoreNameFormat

Reliable, high-performance example:
function camelize(text) {
const a = text.toLowerCase()
.replace(/[-_\s.]+(.)?/g, (_, c) => c ? c.toUpperCase() : '');
return a.substring(0, 1).toLowerCase() + a.substring(1);
}
Case-changing characters:
hyphen -
underscore _
period .
space

function toCamelCase(str) {
// Lower cases the string
return str.toLowerCase()
// Replaces any - or _ characters with a space
.replace( /[-_]+/g, ' ')
// Removes any non alphanumeric characters
.replace( /[^\w\s]/g, '')
// Uppercases the first character in each group immediately following a space
// (delimited by spaces)
.replace( / (.)/g, function($1) { return $1.toUpperCase(); })
// Removes spaces
.replace( / /g, '' );
}
I was trying to find a JavaScript function to camelCase a string, and wanted to make sure special characters would be removed (and I had trouble understanding what some of the answers above were doing). This is based on c c young's answer, with added comments and the removal of $peci&l characters.

If regexp isn't required, you might want to look at following code I made a long time ago for Twinkle:
String.prototype.toUpperCaseFirstChar = function() {
return this.substr( 0, 1 ).toUpperCase() + this.substr( 1 );
}
String.prototype.toLowerCaseFirstChar = function() {
return this.substr( 0, 1 ).toLowerCase() + this.substr( 1 );
}
String.prototype.toUpperCaseEachWord = function( delim ) {
delim = delim ? delim : ' ';
return this.split( delim ).map( function(v) { return v.toUpperCaseFirstChar() } ).join( delim );
}
String.prototype.toLowerCaseEachWord = function( delim ) {
delim = delim ? delim : ' ';
return this.split( delim ).map( function(v) { return v.toLowerCaseFirstChar() } ).join( delim );
}
I haven't made any performance tests, and regexp versions might or might not be faster.

My ES6 approach:
const camelCase = str => {
let string = str.toLowerCase().replace(/[^A-Za-z0-9]/g, ' ').split(' ')
.reduce((result, word) => result + capitalize(word.toLowerCase()))
return string.charAt(0).toLowerCase() + string.slice(1)
}
const capitalize = str => str.charAt(0).toUpperCase() + str.toLowerCase().slice(1)
let baz = 'foo bar'
let camel = camelCase(baz)
console.log(camel) // "fooBar"
camelCase('foo bar') // "fooBar"
camelCase('FOO BAR') // "fooBar"
camelCase('x nN foo bar') // "xNnFooBar"
camelCase('!--foo-¿?-bar--121-**%') // "fooBar121"

This function by pass cammelcase such these tests
Foo Bar
--foo-bar--
__FOO_BAR__-
foo123Bar
foo_Bar
function toCamelCase(str)
{
var arr= str.match(/[a-z]+|\d+/gi);
return arr.map((m,i)=>{
let low = m.toLowerCase();
if (i!=0){
low = low.split('').map((s,k)=>k==0?s.toUpperCase():s).join``
}
return low;
}).join``;
}
console.log(toCamelCase('Foo Bar'));
console.log(toCamelCase('--foo-bar--'));
console.log(toCamelCase('__FOO_BAR__-'));
console.log(toCamelCase('foo123Bar'));
console.log(toCamelCase('foo_Bar'));
console.log(toCamelCase('EquipmentClass name'));
console.log(toCamelCase('Equipment className'));
console.log(toCamelCase('equipment class name'));
console.log(toCamelCase('Equipment Class Name'));

Here is a one liner doing the work:
const camelCaseIt = string => string.toLowerCase().trim().split(/[.\-_\s]/g).reduce((string, word) => string + word[0].toUpperCase() + word.slice(1));
It splits the lower-cased string based on the list of characters provided in the RegExp [.\-_\s] (add more inside the []!) and returns a word array . Then, it reduces the array of strings to one concatenated string of words with uppercased first letters. Because the reduce has no initial value, it will start uppercasing first letters starting with the second word.
If you want PascalCase, just add an initial empty string ,'') to the reduce method.

The top answer is terse but it doesn't handle all edge cases. For anyone needing a more robust utility, without any external dependencies:
function camelCase(str) {
return (str.slice(0, 1).toLowerCase() + str.slice(1))
.replace(/([-_ ]){1,}/g, ' ')
.split(/[-_ ]/)
.reduce((cur, acc) => {
return cur + acc[0].toUpperCase() + acc.substring(1);
});
}
function sepCase(str, sep = '-') {
return str
.replace(/[A-Z]/g, (letter, index) => {
const lcLet = letter.toLowerCase();
return index ? sep + lcLet : lcLet;
})
.replace(/([-_ ]){1,}/g, sep)
}
// All will return 'fooBarBaz'
console.log(camelCase('foo_bar_baz'))
console.log(camelCase('foo-bar-baz'))
console.log(camelCase('foo_bar--baz'))
console.log(camelCase('FooBar Baz'))
console.log(camelCase('FooBarBaz'))
console.log(camelCase('fooBarBaz'))
// All will return 'foo-bar-baz'
console.log(sepCase('fooBarBaz'));
console.log(sepCase('FooBarBaz'));
console.log(sepCase('foo-bar-baz'));
console.log(sepCase('foo_bar_baz'));
console.log(sepCase('foo___ bar -baz'));
console.log(sepCase('foo-bar-baz'));
// All will return 'foo__bar__baz'
console.log(sepCase('fooBarBaz', '__'));
console.log(sepCase('foo-bar-baz', '__'));
Demo here: https://codesandbox.io/embed/admiring-field-dnm4r?fontsize=14&hidenavigation=1&theme=dark

lodash can do the trick sure and well:
var _ = require('lodash');
var result = _.camelCase('toto-ce héros')
// result now contains "totoCeHeros"
Although lodash may be a "big" library (~4kB), it contains a lot of functions that you'd normally use a snippet for, or build yourself.

return "hello world".toLowerCase().replace(/(?:(^.)|(\s+.))/g, function(match) {
return match.charAt(match.length-1).toUpperCase();
}); // HelloWorld

Because this question needed yet another answer...
I tried several of the previous solutions, and all of them had one flaw or another. Some didn't remove punctuation; some didn't handle cases with numbers; some didn't handle multiple punctuations in a row.
None of them handled a string like a1 2b. There's no explicitly defined convention for this case, but some other stackoverflow questions suggested separating the numbers with an underscore.
I doubt this is the most performant answer (three regex passes through the string, rather than one or two), but it passes all the tests I can think of. To be honest, though, I really can't imagine a case where you're doing so many camel-case conversions that performance would matter.
(I added this as an npm package. It also includes an optional boolean parameter to return Pascal Case instead of Camel Case.)
const underscoreRegex = /(?:[^\w\s]|_)+/g,
sandwichNumberRegex = /(\d)\s+(?=\d)/g,
camelCaseRegex = /(?:^\s*\w|\b\w|\W+)/g;
String.prototype.toCamelCase = function() {
if (/^\s*_[\s_]*$/g.test(this)) {
return '_';
}
return this.replace(underscoreRegex, ' ')
.replace(sandwichNumberRegex, '$1_')
.replace(camelCaseRegex, function(match, index) {
if (/^\W+$/.test(match)) {
return '';
}
return index == 0 ? match.trimLeft().toLowerCase() : match.toUpperCase();
});
}
Test cases (Jest)
test('Basic strings', () => {
expect(''.toCamelCase()).toBe('');
expect('A B C'.toCamelCase()).toBe('aBC');
expect('aB c'.toCamelCase()).toBe('aBC');
expect('abc def'.toCamelCase()).toBe('abcDef');
expect('abc__ _ _def'.toCamelCase()).toBe('abcDef');
expect('abc__ _ d_ e _ _fg'.toCamelCase()).toBe('abcDEFg');
});
test('Basic strings with punctuation', () => {
expect(`a'b--d -- f.h`.toCamelCase()).toBe('aBDFH');
expect(`...a...def`.toCamelCase()).toBe('aDef');
});
test('Strings with numbers', () => {
expect('12 3 4 5'.toCamelCase()).toBe('12_3_4_5');
expect('12 3 abc'.toCamelCase()).toBe('12_3Abc');
expect('ab2c'.toCamelCase()).toBe('ab2c');
expect('1abc'.toCamelCase()).toBe('1abc');
expect('1Abc'.toCamelCase()).toBe('1Abc');
expect('abc 2def'.toCamelCase()).toBe('abc2def');
expect('abc-2def'.toCamelCase()).toBe('abc2def');
expect('abc_2def'.toCamelCase()).toBe('abc2def');
expect('abc1_2def'.toCamelCase()).toBe('abc1_2def');
expect('abc1 2def'.toCamelCase()).toBe('abc1_2def');
expect('abc1 2 3def'.toCamelCase()).toBe('abc1_2_3def');
});
test('Oddball cases', () => {
expect('_'.toCamelCase()).toBe('_');
expect('__'.toCamelCase()).toBe('_');
expect('_ _'.toCamelCase()).toBe('_');
expect('\t_ _\n'.toCamelCase()).toBe('_');
expect('_a_'.toCamelCase()).toBe('a');
expect('\''.toCamelCase()).toBe('');
expect(`\tab\tcd`.toCamelCase()).toBe('abCd');
expect(`
ab\tcd\r
-_
|'ef`.toCamelCase()).toBe(`abCdEf`);
});

To effectively create a function that converts the casing of a string to camel-case, the function will also need to convert each string to lower-case first, before transforming the casing of the first character of non-first strings to an uppercase letter.
My example string is:
"text That I WaNt to make cAMEL case"
Many other solutions provided to this question return this:
"textThatIWaNtToMakeCAMELCase"
What I believe should be the expected, desired output would be this though, where all the mid-string uppercase characters are first transformed to be lowercase:
"textThatIWanrtToMakeCamelCase"
This can be done WITHOUT using any replace() method calls, by utilizing the String.prototype.split(), Array.prototype.map(), and Array.prototype.join() methods:
≤ ES5 Version
function makeCamelCase(str) {
return str
.split(' ')
.map((e,i) => i
? e.charAt(0).toUpperCase() + e.slice(1).toLowerCase()
: e.toLowerCase()
)
.join('')
}
makeCamelCase("text That I WaNt to make cAMEL case")
// -> "textThatIWanrtToMakeCamelCase" ✅
I'll break down what each line does, and then provide the same solution in two other formats— ES6 and as a String.prototype method, though I'd advise against extending built-in JavaScript prototypes directly like this.
Explainer
function makeCamelCase(str) {
return str
// split string into array of different words by splitting at spaces
.split(' ')
// map array of words into two different cases, one for the first word (`i == false`) and one for all other words in the array (where `i == true`). `i` is a parameter that denotes the current index of the array item being evaluated. Because indexes start at `0` and `0` is a "falsy" value, we can use the false/else case of this ternary expression to match the first string where `i === 0`.
.map((e,i) => i
// for all non-first words, use a capitalized form of the first character + the lowercase version of the rest of the word (excluding the first character using the slice() method)
? e.charAt(0).toUpperCase() + e.slice(1).toLowerCase()
// for the first word, we convert the entire word to lowercase
: e.toLowerCase()
)
// finally, we join the different strings back together into a single string without spaces, our camel-cased string
.join('')
}
makeCamelCase("text That I WaNt to make cAMEL case")
// -> "textThatIWanrtToMakeCamelCase" ✅
Condensed ES6+ (One-Liner) Version
const makeCamelCase = str => str.split(' ').map((e,i) => i ? e.charAt(0).toUpperCase() + e.slice(1).toLowerCase() : e.toLowerCase()).join('')
makeCamelCase("text That I WaNt to make cAMEL case")
// -> "textThatIWanrtToMakeCamelCase" ✅
String.prototype method version
String.prototype.toCamelCase = function() {
return this
.split(' ')
.map((e,i) => i
? e.charAt(0).toUpperCase() + e.slice(1).toLowerCase()
: e.toLowerCase()
)
.join('')
}
"text That I WaNt to make cAMEL case".toCamelCase()
// -> "textThatIWanrtToMakeCamelCase" ✅

little modified Scott's answer:
toCamelCase = (string) ->
string
.replace /[\s|_|-](.)/g, ($1) -> $1.toUpperCase()
.replace /[\s|_|-]/g, ''
.replace /^(.)/, ($1) -> $1.toLowerCase()
now it replaces '-' and '_' too.

All 14 permutations below produce the same result of "equipmentClassName".
String.prototype.toCamelCase = function() {
return this.replace(/[^a-z ]/ig, '') // Replace everything but letters and spaces.
.replace(/(?:^\w|[A-Z]|\b\w|\s+)/g, // Find non-words, uppercase letters, leading-word letters, and multiple spaces.
function(match, index) {
return +match === 0 ? "" : match[index === 0 ? 'toLowerCase' : 'toUpperCase']();
});
}
String.toCamelCase = function(str) {
return str.toCamelCase();
}
var testCases = [
"equipment class name",
"equipment class Name",
"equipment Class name",
"equipment Class Name",
"Equipment class name",
"Equipment class Name",
"Equipment Class name",
"Equipment Class Name",
"equipment className",
"equipment ClassName",
"Equipment ClassName",
"equipmentClass name",
"equipmentClass Name",
"EquipmentClass Name"
];
for (var i = 0; i < testCases.length; i++) {
console.log(testCases[i].toCamelCase());
};

you can use this solution:
String.prototype.toCamelCase = function(){
return this.replace(/\s(\w)/ig, function(all, letter){return letter.toUpperCase();})
.replace(/(^\w)/, function($1){return $1.toLowerCase()});
};
console.log('Equipment className'.toCamelCase());

Here's my suggestion:
function toCamelCase(string) {
return `${string}`
.replace(new RegExp(/[-_]+/, 'g'), ' ')
.replace(new RegExp(/[^\w\s]/, 'g'), '')
.replace(
new RegExp(/\s+(.)(\w+)/, 'g'),
($1, $2, $3) => `${$2.toUpperCase() + $3.toLowerCase()}`
)
.replace(new RegExp(/\s/, 'g'), '')
.replace(new RegExp(/\w/), s => s.toLowerCase());
}
or
String.prototype.toCamelCase = function() {
return this
.replace(new RegExp(/[-_]+/, 'g'), ' ')
.replace(new RegExp(/[^\w\s]/, 'g'), '')
.replace(
new RegExp(/\s+(.)(\w+)/, 'g'),
($1, $2, $3) => `${$2.toUpperCase() + $3.toLowerCase()}`
)
.replace(new RegExp(/\s/, 'g'), '')
.replace(new RegExp(/\w/), s => s.toLowerCase());
};
Test cases:
describe('String to camel case', function() {
it('should return a camel cased string', function() {
chai.assert.equal(toCamelCase('foo bar'), 'fooBar');
chai.assert.equal(toCamelCase('Foo Bar'), 'fooBar');
chai.assert.equal(toCamelCase('fooBar'), 'fooBar');
chai.assert.equal(toCamelCase('FooBar'), 'fooBar');
chai.assert.equal(toCamelCase('--foo-bar--'), 'fooBar');
chai.assert.equal(toCamelCase('__FOO_BAR__'), 'fooBar');
chai.assert.equal(toCamelCase('!--foo-¿?-bar--121-**%'), 'fooBar121');
});
});

following #Scott's readable approach, a little bit of fine tuning
// convert any string to camelCase
var toCamelCase = function(str) {
return str.toLowerCase()
.replace( /['"]/g, '' )
.replace( /\W+/g, ' ' )
.replace( / (.)/g, function($1) { return $1.toUpperCase(); })
.replace( / /g, '' );
}

There is my solution:
const toCamelWord = (word, idx) =>
idx === 0 ?
word.toLowerCase() :
word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
const toCamelCase = text =>
text
.split(/[_-\s]+/)
.map(toCamelWord)
.join("");
console.log(toCamelCase('User ID'))

This method seems to outperform most answers on here, it's a little bit hacky though, no replaces, no regex, simply building up a new string that's camelCase.
String.prototype.camelCase = function(){
var newString = '';
var lastEditedIndex;
for (var i = 0; i < this.length; i++){
if(this[i] == ' ' || this[i] == '-' || this[i] == '_'){
newString += this[i+1].toUpperCase();
lastEditedIndex = i+1;
}
else if(lastEditedIndex !== i) newString += this[i].toLowerCase();
}
return newString;
}

This builds on the answer by CMS by removing any non-alphabetic characters including underscores, which \w does not remove.
function toLowerCamelCase(str) {
return str.replace(/[^A-Za-z0-9]/g, ' ').replace(/^\w|[A-Z]|\b\w|\s+/g, function (match, index) {
if (+match === 0 || match === '-' || match === '.' ) {
return ""; // or if (/\s+/.test(match)) for white spaces
}
return index === 0 ? match.toLowerCase() : match.toUpperCase();
});
}
toLowerCamelCase("EquipmentClass name");
toLowerCamelCase("Equipment className");
toLowerCamelCase("equipment class name");
toLowerCamelCase("Equipment Class Name");
toLowerCamelCase("Equipment-Class-Name");
toLowerCamelCase("Equipment_Class_Name");
toLowerCamelCase("Equipment.Class.Name");
toLowerCamelCase("Equipment/Class/Name");
// All output e

Upper camel case ("TestString") to lower camel case ("testString") without using regex (let's face it, regex is evil):
'TestString'.split('').reduce((t, v, k) => t + (k === 0 ? v.toLowerCase() : v), '');

I ended up crafting a slightly more aggressive solution:
function toCamelCase(str) {
const [first, ...acc] = str.replace(/[^\w\d]/g, ' ').split(/\s+/);
return first.toLowerCase() + acc.map(x => x.charAt(0).toUpperCase()
+ x.slice(1).toLowerCase()).join('');
}
This one, above, will remove all non-alphanumeric characters and lowercase parts of words that would otherwise remain uppercased, e.g.
Size (comparative) => sizeComparative
GDP (official exchange rate) => gdpOfficialExchangeRate
hello => hello

function convertStringToCamelCase(str){
return str.split(' ').map(function(item, index){
return index !== 0
? item.charAt(0).toUpperCase() + item.substr(1)
: item.charAt(0).toLowerCase() + item.substr(1);
}).join('');
}

I know this is an old answer, but this handles both whitespace and _ (lodash)
function toCamelCase(s){
return s
.replace(/_/g, " ")
.replace(/\s(.)/g, function($1) { return $1.toUpperCase(); })
.replace(/\s/g, '')
.replace(/^(.)/, function($1) { return $1.toLowerCase(); });
}
console.log(toCamelCase("Hello world");
console.log(toCamelCase("Hello_world");
// Both print "helloWorld"

const toCamelCase = str =>
str
.replace(/[^a-zA-Z0-9]+(.)/g, (m, chr) => chr.toUpperCase())
.replace(/^\w/, c => c.toLowerCase());

We Keep Coding

JavaScript is the programming language of the Web.

Match exact word with new lines anywhere using JavaScript - javascript

Related

Capitalize first letter of each word in JS

How to separate the values of a line of .csv file which contains commas in data? [duplicate]

String replace module definition

Replace underscores with spaces and capitalize words

Converting any string into camel case

Categories

Resources