Split string by spaces and double quotes in Javascript

Split string by spaces and double quotes in Javascript - javascript

I am trying to split a string in Javascript by spaces and double quotes("), but with a special condition: two quotes match only if the first is preceded by whitespace/start of string and the second is followed by whitespace or end of string.
Some examples:
"a "b cd" becomes ['a "b cd']
"a b" cd" becomes ['a b', 'cd"']
"a "b" "x"" cd" becomes ['a "b', 'x"', 'cd"']
Note that quotes which don't obey the above condition remain in their correspondent atoms.
I have a regex which splits by spaces and grouped quotes, but i can't quite figure out one that does what I said.
/(?:[^\s"]+|"[^"]*")+/g splits quotes 2 by 2, but that is no good.

I believe this should satisfy your requirements:-
((^|\s)".*?(\w|$)")

Here's a more or less manual solution:
function split(s){
var s = ' '+s+' ';
var OUT = 0, IN = 1;
var chunk = "";
var results = [];
var state = OUT;
var length = s.length;
for(var i=0; i<length; ++i){
if(state === OUT){
if(s[i]===' ' && s[i+1] === '"'){
i++; state = IN; chunk=""; continue;
}else {
chunk+=s[i];
}
}else if(state === IN ){
if(s[i]==='"' && s[i+1] === ' '){
i++; state = OUT; results.push(chunk); chunk="";
if(s[i+1] === '"'){
i++; state = IN; continue;
}
}else {
chunk+=s[i];
}
}
}
chunk.replace(/ $/,'');
if(chunk.length !== 0){
results.push(chunk);
}
return results;
}
['"a "b cd"', '"a b" cd"', '"a "b" "x"" cd"'].forEach(function(s){
var result = s + ' => ' + "[ '" +split(s).join("', '") + "' ]" + '</br>\n';
document.write(result);
});
(There might be a better way to inspect javascript arrays than what I'm doing there with the join method.)

Related

Does the input string have a . or ? or ' or space?

Im writing a program that will tell you if the input string contains a period, question mark, colon or space. If it doesn't it'll return "false". If it does, it'll return all of the characters before the found punctuation. Ex. str= "grey cat", program will return "grey" because it was stopped by the space. Can I use "or" in Javascript? EDIT Im trying to do this without any built-in functions. Im not looking for an efficient way
update - now it's just printing the str. How do I get it to just print what comes before the punctuation (if any)
function find_punctuation(str){
let result = "";
for (let i = 0; i < str.length; i ++ )
if (str[i] === "." || str[i] === "?"|| str[i] === "" || str[i] === ","){
}
else result += str[i]
return result
}
console.log(find_punctuation('he. y'));

function find_punctuation(str) {
let result = "";
for (let i = 0; i < str.length; i++) {
// Determine if the current character is punctuation.
const is_punctuation = (
str[i] === "." ||
str[i] === "?" ||
str[i] === " " ||
str[i] === ","
);
if (is_punctuation) {
// If the current character is punctuation, then exit the loop.
break;
} else {
// Otherwise, keep adding to the result string.
result += str[i];
}
}
// The result string will now contain all characters until the first
// punctuation character, because the loop that added characters to
// this string was exited as soon as the first punctuation character
// was detected.
return result;
}
console.log(find_punctuation('he. y'));
console.log(find_punctuation('hell,o'));
console.log(find_punctuation('hello'));
console.log(find_punctuation('cat?erpillar'));
console.log(find_punctuation('grey cat'));

Use a regular expression:
function find_punctuation(str) {
let result = str.match(/^(.*?)[.?: ]/);
if (result) {
return result[1];
} else {
return false;
}
}
console.log(find_punctuation('he.y'));
console.log(find_punctuation('hey'));

Try this:
function find_punctuation(str){
let result = "";
for (let i = 0; i < str.length; i ++ )
if (str[i] === "." || str[i] === "?" || str[i] === " ") break
else result += str[i]
return result
}
console.log(find_punctuation('he.y'));

How to match PHP's explode(';',$s,3) to s.split(';',3) in JavaScript?

If you run an explode in PHP with the resulting array length limited, it will append the remainder of the string to the last element. This is how exploding a string should behave, since nowhere in the split am I saying that I want to discard my data, just split it. This is how it works in PHP:
# Name;Date;Quote
$s = 'Mark Twain;1879-11-14;"We haven\'t all had the good fortune to be ladies; we haven\'t all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground."';
$a = explode(';',$s,3);
var_dump($a);
array(3) {
[0]=>
string(10) "Mark Twain"
[1]=>
string(10) "1879-11-14"
[2]=>
string(177) ""We haven't all had the good fortune to be ladies; we haven't all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground.""
}
However, if you run the same code in JavaScript:
> var s = 'Mark Twain;1879-11-14;"We haven\'t all had the good fortune to be ladies; we haven\'t all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground."'
undefined
> var a = s.split(';',3);
undefined
> a
[ 'Mark Twain',
'1879-11-14',
'"We haven\'t all had the good fortune to be ladies' ]
This makes absolutely no sense, because the whole point of splitting a string is to treat the final portion of the string as a literal, instead of delimited. JavaScript's split with a limit is the exact same as:
# In PHP
$a = array_slice(explode(';',$s), 0, 3);
# Or in JavaScript
var a = s.split(';').slice(0, 3);
If the user in JavaScript only wanted to make use of the first two elements in this array, whether the array is split or not doesn't matter. The first two elements will always have the same value no matter what. The only element that changes, is the last element of the split array.
If the native split with limit method in JavaScript can be replicated using a slice, then what value does it provide?
But I digress, what is the most efficient way to replicate the explode functionality in PHP? Removing each element as a substring until the last element is reached, splitting the entire string and then concatenating the remaining elements, getting the location of the n - 1 delimiter and getting a substring of that, or any other solution I haven't thought of?

According documentation the split function accepts two arguments:
string.split(separator, limit)
However this still gives not the result you want because:
The second parameter is an integer that specifies the number of
splits, items after the split limit will not be included in the array
However, I noticed that the ';' in the text has a space behind it. So you could use a regex.
var s = 'Mark Twain;1879-11-14;"We haven\'t all had the good fortune to be ladies; we haven\'t all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground."'
var a = s.split(/;(?! )/,3)
console.log(a);
The Regex (/;(?! ) splits all ';' except if there is a space behind it.
Hope this helps!

Loctus.io got you covered, they ported php's explode, and a great number of other php functions to javascript
usage:
$s = 'Mark Twain;1879-11-14;"We haven\'t all had the good fortune to be ladies; we haven\'t all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground."';
"Mark Twain;1879-11-14;"We haven't all had the good fortune to be ladies; we haven't all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground.""
$a = explode(';',$s,3);
content of $a as reported by Chrome's javascript console:
0: "Mark Twain"
1: "1879-11-14"
2: ""We haven't all had the good fortune to be ladies; we haven't all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground.""
length: 3
, source: http://locutus.io/php/strings/explode/
function explode (delimiter, string, limit) {
// discuss at: http://locutus.io/php/explode/
// original by: Kevin van Zonneveld (http://kvz.io)
// example 1: explode(' ', 'Kevin van Zonneveld')
// returns 1: [ 'Kevin', 'van', 'Zonneveld' ]
if (arguments.length < 2 ||
typeof delimiter === 'undefined' ||
typeof string === 'undefined') {
return null
}
if (delimiter === '' ||
delimiter === false ||
delimiter === null) {
return false
}
if (typeof delimiter === 'function' ||
typeof delimiter === 'object' ||
typeof string === 'function' ||
typeof string === 'object') {
return {
0: ''
}
}
if (delimiter === true) {
delimiter = '1'
}
// Here we go...
delimiter += ''
string += ''
var s = string.split(delimiter)
if (typeof limit === 'undefined') return s
// Support for limit
if (limit === 0) limit = 1
// Positive limit
if (limit > 0) {
if (limit >= s.length) {
return s
}
return s
.slice(0, limit - 1)
.concat([s.slice(limit - 1)
.join(delimiter)
])
}
// Negative limit
if (-limit >= s.length) {
return []
}
s.splice(s.length + limit)
return s
}
edit: if you for some reason need/want a smaller implementation, here's 1 i made in response to the comments:
function explode(delimiter, string, limit) {
var spl = string.split(delimiter);
if (spl.length <= limit) {
return spl;
}
var ret = [],i=0;
for (; i < limit; ++i) {
ret.push(spl[i]);
}
for (; i < spl.length; ++i) {
ret[limit - 1] += delimiter+spl[i];
}
return ret;
}

Alright, I created 4 alternative versions of the PHP split string algorithm, along with the two provided by #hanshenrik, and did a basic benchmark on them:
function explode1(delimiter, str, limit) {
if (limit == null) {
return s.split(delimiter);
}
var a = [];
var lastIndex = -1;
var index = 0;
for (var i = 0; i < limit; i++) {
index = str.indexOf(delimiter, lastIndex + 1);
if (i == limit - 1) {
a.push(str.substring(lastIndex + 1));
} else {
a.push(str.substring(lastIndex + 1, index));
}
lastIndex = index;
}
return a;
}
function explode2(delimiter, str, limit) {
if (limit == null) {
return s.split(delimiter);
}
var a = str.split(delimiter);
var ret = a.slice(0, limit - 1);
ret.push(a.slice(limit - 1).join(delimiter));
return ret;
}
function explode3(delimiter, str, limit) {
if (limit == null) {
return s.split(delimiter);
}
var a = s.split(delimiter, limit - 1);
var index = 0;
for (var i = 0; i < limit - 1; i++) {
index = s.indexOf(delimiter, index + 1);
}
a.push(str.substring(index + 1));
return a;
}
function explode4(delimiter, str, limit) {
if (limit == null) {
return s.split(delimiter);
}
var a = str.split(delimiter, limit - 1);
a.push(str.substring(a.join(delimiter).length + 1));
return a;
}
function explode5(delimiter, string, limit) {
// discuss at: http://locutus.io/php/explode/
// original by: Kevin van Zonneveld (http://kvz.io)
// example 1: explode(' ', 'Kevin van Zonneveld')
// returns 1: [ 'Kevin', 'van', 'Zonneveld' ]
if (arguments.length < 2 ||
typeof delimiter === 'undefined' ||
typeof string === 'undefined') {
return null
}
if (delimiter === '' ||
delimiter === false ||
delimiter === null) {
return false
}
if (typeof delimiter === 'function' ||
typeof delimiter === 'object' ||
typeof string === 'function' ||
typeof string === 'object') {
return {
0: ''
}
}
if (delimiter === true) {
delimiter = '1'
}
// Here we go...
delimiter += ''
string += ''
var s = string.split(delimiter)
if (typeof limit === 'undefined') return s
// Support for limit
if (limit === 0) limit = 1
// Positive limit
if (limit > 0) {
if (limit >= s.length) {
return s
}
return s
.slice(0, limit - 1)
.concat([s.slice(limit - 1)
.join(delimiter)
])
}
// Negative limit
if (-limit >= s.length) {
return []
}
s.splice(s.length + limit)
return s
}
function explode6(delimiter, string, limit) {
var spl = string.split(delimiter);
if (spl.length <= limit) {
return spl;
}
var ret = [],i=0;
for (; i < limit; ++i) {
ret.push(spl[i]);
}
for (; i < spl.length; ++i) {
ret[limit - 1] += delimiter+spl[i];
}
return ret;
}
var s = 'Mark Twain,1879-11-14,"We haven\'t all had the good fortune to be ladies; we haven\'t all been generals, or poets, or statesmen; but when the toast works down to the babies, we stand on common ground."'
console.log(s);
console.time('explode1');
var a1 = explode1(',', s, 3);
//console.log(a1);
console.timeEnd('explode1');
console.time('explode2');
var a2 = explode2(',', s, 3);
//console.log(a2);
console.timeEnd('explode2');
console.time('explode3');
var a3 = explode3(',', s, 3);
//console.log(a3);
console.timeEnd('explode3');
console.time('explode4');
var a4 = explode4(',', s, 3);
//console.log(a4);
console.timeEnd('explode4');
console.time('explode5');
var a5 = explode5(',', s, 3);
//console.log(a5);
console.timeEnd('explode5');
console.time('explode6');
var a6 = explode6(',', s, 3);
//console.log(a6);
console.timeEnd('explode6');
The two best-performing algorithms was explode4 principally, with explode3 a close second in multiple iterations of the benchmark:
$ node explode1.js && node explode2.js && node explode3.js && node
explode4.js && node explode5.js && node explode6.js
explode1: 0.200ms
explode2: 0.194ms
explode3: 0.147ms
explode4: 0.183ms
explode5: 0.341ms
explode6: 0.162ms
You can run your own benchmarks, but with my tests I can confirm that splitting an array by n - 1 and then getting an index from joining the resulting array is the fastest algorithm matching explode in PHP.
EDIT: It turns out that the garbage collector biased how each successive function was measured, so I split them off into their own individual files and re-ran the benchmarking a few times. It seems explode3 is the best performing, not explode4, but I won't make a decision that I'm not completely sure of.

Convert camel case to human readable string?

Is there a reg exp or function that will convert camel case, css and underscore to human readable format? It does not need to support non-humans at this time. Sorry aliens. :(
Examples:
helloWorld -> "Hello World"
hello-world -> "Hello World"
hello_world -> "Hello World"

Split by non-words; capitalize; join:
function toCapitalizedWords(name) {
var words = name.match(/[A-Za-z][a-z]*/g) || [];
return words.map(capitalize).join(" ");
}
function capitalize(word) {
return word.charAt(0).toUpperCase() + word.substring(1);
}

Extract all words with a regular expression. Capitalize them. Then, join them with spaces.
Example regexp:
/^[a-z]+|[A-Z][a-z]*/g
/ ^[a-z]+ // 1 or more lowercase letters at the beginning of the string
| // OR
[A-Z][a-z]* // a capital letter followed by zero or more lowercase letters
/g // global, match all instances
Example function:
var camelCaseToWords = function(str){
return str.match(/^[a-z]+|[A-Z][a-z]*/g).map(function(x){
return x[0].toUpperCase() + x.substr(1).toLowerCase();
}).join(' ');
};
camelCaseToWords('camelCaseString');
// Camel Case String
camelCaseToWords('thisIsATest');
// This Is A Test

Here is the ActionScript version based on the idea from Ricks C example code. For JavaScript version remove the strong typing. For example, change var value:String to var value. Basically remove any declaration that starts with a semicolon, :String, :int, etc.
/**
* Changes camel case to a human readable format. So helloWorld, hello-world and hello_world becomes "Hello World".
* */
public static function prettifyCamelCase(value:String=""):String {
var output:String = "";
var len:int = value.length;
var char:String;
for (var i:int;i<len;i++) {
char = value.charAt(i);
if (i==0) {
output += char.toUpperCase();
}
else if (char !== char.toLowerCase() && char === char.toUpperCase()) {
output += " " + char;
}
else if (char == "-" || char == "_") {
output += " ";
}
else {
output += char;
}
}
return output;
}
JavaScript version:
/**
* Changes camel case to a human readable format. So helloWorld, hello-world and hello_world becomes "Hello World".
* */
function prettifyCamelCase(str) {
var output = "";
var len = str.length;
var char;
for (var i=0 ; i<len ; i++) {
char = str.charAt(i);
if (i==0) {
output += char.toUpperCase();
}
else if (char !== char.toLowerCase() && char === char.toUpperCase()) {
output += " " + char;
}
else if (char == "-" || char == "_") {
output += " ";
}
else {
output += char;
}
}
return output;
}

You can use a replacement function for String.replace, e.g.
function capitalize(s) {
return s[0].toUpperCase() + s.slice(1);
}
function replacer1(match, p1, p2, p3, offset, s) {
return p1 + capitalize(p2) + ' ' + p3;
}
var s1 = "helloWorld";
var r1 = s1.replace(/(^|[^a-z])([a-z]+)([A-Z])/, replacer1);
console.log(r1);
hello-world and hello_world work similar.
See JSFiddle

I don't know if there is already a built in method to do this but you could loop through the string and every time you see a character that you want to split on do so.
In your case something like:
my_str = 'helloWorld';
returnString = '';
for(var i = 0; i < my_str.length; i++) {
if(i == 0) {
returnString += (my_str[i] + 32); // capitalize the first character
}
else if(my_str[i] > 'A' || my_str[i] < 'Z') {
returnString += ' ' + my_str[i]; // add a space
}
else if(my_str[i] == '-' || my_str[i] == '_') {
returnString += ' ';
}
else {
returnString += my_string[i];
}
}
return returnString;
Edit:
After the numerous comments I have come to realize that I put up some broken code :P
Here is a tested version of it:
my_str = 'helloWorld';
function readable(str) {
// and this was a mistake about javascript/actionscript being able to capitalize
// by adding 32
returnString = str[0].toUpperCase();
for(var i = 1; i < str.length; i++) {
// my mistakes here were that it needs to be between BOTH 'A' and 'Z' inclusive
if(str[i] >= 'A' && str[i] <= 'Z') {
returnString += ' ' + str[i];
}
else if(str[i] == '-' || str[i] == '_') {
returnString += ' ';
}
else {
returnString += str[i];
}
}
return returnString;
}

Non elegant one liner using regex replaces with functions.
replace 1 - upper case first letter and remove _-
replace 2 - add space between lower case letters and upper case letters
var titleCase = s => s
.replace(/(^|[_-])([a-z])/g, (a, b, c) => c.toUpperCase())
.replace(/([a-z])([A-Z])/g, (a, b, c) => `${b} ${c}`);
console.log(titleCase("helloWorld"));
console.log(titleCase("hello-world"));
console.log(titleCase("hello_world"));

If using a library is an option, Lodash's startCase or lowerCase might be useful:
https://lodash.com/docs/#startCase
https://lodash.com/docs/#lowerCase

const result = _.chain("hello-world")
.snakeCase()
.split("_")
.map(w => _.capitalize(w))
.join(" ")
.value()
console.log(result)
<script src="
https://cdn.jsdelivr.net/npm/lodash#4.17.21/lodash.min.js
"></script>

Escape quotes while splitting string in javascript [duplicate]

Where could I find some JavaScript code to parse CSV data?

You can use the CSVToArray() function mentioned in this blog entry.
<script type="text/javascript">
// ref: http://stackoverflow.com/a/1293163/2343
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
var strMatchedValue;
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
</script>

jQuery-CSV
It's a jQuery plugin designed to work as an end-to-end solution for parsing CSV into JavaScript data. It handles every single edge case presented in RFC 4180, as well as some that pop up for Excel/Google spreadsheet exports (i.e., mostly involving null values) that the specification is missing.
Example:
track,artist,album,year
Dangerous,'Busta Rhymes','When Disaster Strikes',1997
// Calling this
music = $.csv.toArrays(csv)
// Outputs...
[
["track", "artist", "album", "year"],
["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]
console.log(music[1][2]) // Outputs: 'When Disaster Strikes'
Update:
Oh yeah, I should also probably mention that it's completely configurable.
music = $.csv.toArrays(csv, {
delimiter: "'", // Sets a custom value delimiter character
separator: ';', // Sets a custom field separator character
});
Update 2:
It now works with jQuery on Node.js too. So you have the option of doing either client-side or server-side parsing with the same library.
Update 3:
Since the Google Code shutdown, jquery-csv has been migrated to GitHub.
Disclaimer: I am also the author of jQuery-CSV.

Here's an extremely simple CSV parser that handles quoted fields with commas, new lines, and escaped double quotation marks. There's no splitting or regular expression. It scans the input string 1-2 characters at a time and builds an array.
Test it at http://jsfiddle.net/vHKYH/.
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr;
}

I have an implementation as part of a spreadsheet project.
This code is not yet tested thoroughly, but anyone is welcome to use it.
As some of the answers noted though, your implementation can be much simpler if you actually have DSV or TSV file, as they disallow the use of the record and field separators in the values. CSV, on the other hand, can actually have commas and newlines inside a field, which breaks most regular expression and split-based approaches.
var CSV = {
parse: function(csv, reviver) {
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
start = end = c;
if ('"' === chars[c]){
start = end = ++c;
while (c < cc) {
if ('"' === chars[c]) {
if ('"' !== chars[c+1]) {
break;
}
else {
chars[++c] = ''; // unescape ""
}
}
end = ++c;
}
if ('"' === chars[c]) {
++c;
}
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
++c;
}
} else {
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
end = ++c;
}
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (',' === chars[c]) {
++c;
}
}
if ('\r' === chars[c]) {
++c;
}
if ('\n' === chars[c]) {
++c;
}
}
return table;
},
stringify: function(table, replacer) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) {
csv += '\r\n';
}
for (c = 0, cc = table[r].length; c < cc; ++c) {
if (c) {
csv += ',';
}
cell = replacer(r, c, table[r][c]);
if (/[,\r\n"]/.test(cell)) {
cell = '"' + cell.replace(/"/g, '""') + '"';
}
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};

csvToArray v1.3
A compact (645 bytes), but compliant function to convert a CSV string into a 2D array, conforming to the RFC4180 standard.
https://code.google.com/archive/p/csv-to-array/downloads
Common Usage: jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
Common usage: JavaScript
csvAsArray = csvAsString.csvToArray();
Override field separator
csvAsArray = csvAsString.csvToArray("|");
Override record separator
csvAsArray = csvAsString.csvToArray("", "#");
Override Skip Header
csvAsArray = csvAsString.csvToArray("", "", 1);
Override all
csvAsArray = csvAsString.csvToArray("|", "#", 1);

Here's my PEG(.js) grammar that seems to do ok at RFC 4180 (i.e. it handles the examples at http://en.wikipedia.org/wiki/Comma-separated_values):
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
Try it out at http://jsfiddle.net/knvzk/10 or http://pegjs.majda.cz/online. Download the generated parser at https://gist.github.com/3362830.

Here's another solution. This uses:
a coarse global regular expression for splitting the CSV string (which includes surrounding quotes and trailing commas)
fine-grained regular expression for cleaning up the surrounding quotes and trailing commas
also, has type correction differentiating strings, numbers, boolean values and null values
For the following input string:
"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,
The code outputs:
[
"This is, a value",
"Hello",
4,
-123,
3.1415,
"This is also, possible",
true,
null
]
Here's my implementation of parseCSVLine() in a runnable code snippet:
function parseCSVLine(text) {
return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
let m;
if (m = text.match(/^\s*,?$/)) return null; // null value
if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
return text;
} );
}
let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );

Here's my simple vanilla JavaScript code:
let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))
function splitQuotes(line) {
if(line.indexOf('"') < 0)
return line.split(',')
let result = [], cell = '', quote = false;
for(let i = 0; i < line.length; i++) {
char = line[i]
if(char == '"' && line[i+1] == '"') {
cell += char
i++
} else if(char == '"') {
quote = !quote;
} else if(!quote && char == ',') {
result.push(cell)
cell = ''
} else {
cell += char
}
if ( i == line.length-1 && cell) {
result.push(cell)
}
}
return result
}

I'm not sure why I couldn't get Kirtan's example to work for me. It seemed to be failing on empty fields or maybe fields with trailing commas...
This one seems to handle both.
I did not write the parser code, just a wrapper around the parser function to make this work for a file. See attribution.
var Strings = {
/**
* Wrapped CSV line parser
* #param s String delimited CSV string
* #param sep Separator override
* #attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// http://stackoverflow.com/questions/1155678/javascript-string-newline-character
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}

Regular expressions to the rescue! These few lines of code handle properly quoted fields with embedded commas, quotes, and newlines based on the RFC 4180 standard.
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
You don't need a parser-generator such as lex/yacc. The regular expression handles RFC 4180 properly thanks to positive lookbehind, negative lookbehind, and positive lookahead.
Clone/download code at https://github.com/peterthoeny/parse-csv-js

Just throwing this out there.. I recently ran into the need to parse CSV columns with Javascript, and I opted for my own simple solution. It works for my needs, and may help someone else.
const csvString = '"Some text, some text",,"",true,false,"more text","more,text, more, text ",true';
const parseCSV = text => {
const lines = text.split('\n');
const output = [];
lines.forEach(line => {
line = line.trim();
if (line.length === 0) return;
const skipIndexes = {};
const columns = line.split(',');
output.push(columns.reduce((result, item, index) => {
if (skipIndexes[index]) return result;
if (item.startsWith('"') && !item.endsWith('"')) {
while (!columns[index + 1].endsWith('"')) {
index++;
item += `,${columns[index]}`;
skipIndexes[index] = true;
}
index++;
skipIndexes[index] = true;
item += `,${columns[index]}`;
}
result.push(item);
return result;
}, []));
});
return output;
};
console.log(parseCSV(csvString));

Personally I like to use deno std library since most modules are officially compatible with the browser
The problem is that the std is in typescript but official solution might happen in the future https://github.com/denoland/deno_std/issues/641 https://github.com/denoland/dotland/issues/1728
For now there is an actively maintained on the fly transpiler https://bundle.deno.dev/
so you can use it simply like this
<script type="module">
import { parse } from "https://bundle.deno.dev/https://deno.land/std#0.126.0/encoding/csv.ts"
console.log(await parse("a,b,c\n1,2,3"))
</script>

I have constructed this JavaScript script to parse a CSV in string to array object. I find it better to break down the whole CSV into lines, fields and process them accordingly. I think that it will make it easy for you to change the code to suit your need.
//
//
// CSV to object
//
//
const new_line_char = '\n';
const field_separator_char = ',';
function parse_csv(csv_str) {
var result = [];
let line_end_index_moved = false;
let line_start_index = 0;
let line_end_index = 0;
let csr_index = 0;
let cursor_val = csv_str[csr_index];
let found_new_line_char = get_new_line_char(csv_str);
let in_quote = false;
// Handle \r\n
if (found_new_line_char == '\r\n') {
csv_str = csv_str.split(found_new_line_char).join(new_line_char);
}
// Handle the last character is not \n
if (csv_str[csv_str.length - 1] !== new_line_char) {
csv_str += new_line_char;
}
while (csr_index < csv_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === new_line_char) {
if (in_quote === false) {
if (line_end_index_moved && (line_start_index <= line_end_index)) {
result.push(parse_csv_line(csv_str.substring(line_start_index, line_end_index)));
line_start_index = csr_index + 1;
} // Else: just ignore line_end_index has not moved or line has not been sliced for parsing the line
} // Else: just ignore because we are in a quote
}
csr_index++;
cursor_val = csv_str[csr_index];
line_end_index = csr_index;
line_end_index_moved = true;
}
// Handle \r\n
if (found_new_line_char == '\r\n') {
let new_result = [];
let curr_row;
for (var i = 0; i < result.length; i++) {
curr_row = [];
for (var j = 0; j < result[i].length; j++) {
curr_row.push(result[i][j].split(new_line_char).join('\r\n'));
}
new_result.push(curr_row);
}
result = new_result;
}
return result;
}
function parse_csv_line(csv_line_str) {
var result = [];
//let field_end_index_moved = false;
let field_start_index = 0;
let field_end_index = 0;
let csr_index = 0;
let cursor_val = csv_line_str[csr_index];
let in_quote = false;
// Pretend that the last char is the separator_char to complete the loop
csv_line_str += field_separator_char;
while (csr_index < csv_line_str.length) {
if (cursor_val === '"') {
in_quote = !in_quote;
} else if (cursor_val === field_separator_char) {
if (in_quote === false) {
if (field_start_index <= field_end_index) {
result.push(parse_csv_field(csv_line_str.substring(field_start_index, field_end_index)));
field_start_index = csr_index + 1;
} // Else: just ignore field_end_index has not moved or field has not been sliced for parsing the field
} // Else: just ignore because we are in quote
}
csr_index++;
cursor_val = csv_line_str[csr_index];
field_end_index = csr_index;
field_end_index_moved = true;
}
return result;
}
function parse_csv_field(csv_field_str) {
with_quote = (csv_field_str[0] === '"');
if (with_quote) {
csv_field_str = csv_field_str.substring(1, csv_field_str.length - 1); // remove the start and end quotes
csv_field_str = csv_field_str.split('""').join('"'); // handle double quotes
}
return csv_field_str;
}
// Initial method: check the first newline character only
function get_new_line_char(csv_str) {
if (csv_str.indexOf('\r\n') > -1) {
return '\r\n';
} else {
return '\n'
}
}

Just use .split(','):
var str = "How are you doing today?";
var n = str.split(" ");

How to get the next letter of the alphabet in Javascript?

I am build an autocomplete that searches off of a CouchDB View.
I need to be able to take the final character of the input string, and replace the last character with the next letter of the english alphabet. (No need for i18n here)
For Example:
Input String = "b"
startkey = "b"
endkey = "c"
OR
Input String = "foo"
startkey = "foo"
endkey = "fop"
(in case you're wondering, I'm making sure to include the option inclusive_end=false so that this extra character doesn't taint my resultset)
The Question
Is there a function natively in Javascript that can just get the next letter of the alphabet?
Or will I just need to suck it up and do my own fancy function with a base string like "abc...xyz" and indexOf()?

my_string.substring(0, my_string.length - 1)
+ String.fromCharCode(my_string.charCodeAt(my_string.length - 1) + 1)

// This will return A for Z and a for z.
function nextLetter(s){
return s.replace(/([a-zA-Z])[^a-zA-Z]*$/, function(a){
var c= a.charCodeAt(0);
switch(c){
case 90: return 'A';
case 122: return 'a';
default: return String.fromCharCode(++c);
}
});
}

A more comprehensive solution, which gets the next letter according to how MS Excel numbers it's columns... A B C ... Y Z AA AB ... AZ BA ... ZZ AAA
This works with small letters, but you can easily extend it for caps too.
getNextKey = function(key) {
if (key === 'Z' || key === 'z') {
return String.fromCharCode(key.charCodeAt() - 25) + String.fromCharCode(key.charCodeAt() - 25); // AA or aa
} else {
var lastChar = key.slice(-1);
var sub = key.slice(0, -1);
if (lastChar === 'Z' || lastChar === 'z') {
// If a string of length > 1 ends in Z/z,
// increment the string (excluding the last Z/z) recursively,
// and append A/a (depending on casing) to it
return getNextKey(sub) + String.fromCharCode(lastChar.charCodeAt() - 25);
} else {
// (take till last char) append with (increment last char)
return sub + String.fromCharCode(lastChar.charCodeAt() + 1);
}
}
return key;
};

Here is a function that does the same thing (except for upper case only, but that's easy to change) but uses slice only once and is iterative rather than recursive. In a quick benchmark, it's about 4 times faster (which is only relevant if you make really heavy use of it!).
function nextString(str) {
if (! str)
return 'A' // return 'A' if str is empty or null
let tail = ''
let i = str.length -1
let char = str[i]
// find the index of the first character from the right that is not a 'Z'
while (char === 'Z' && i > 0) {
i--
char = str[i]
tail = 'A' + tail // tail contains a string of 'A'
}
if (char === 'Z') // the string was made only of 'Z'
return 'AA' + tail
// increment the character that was not a 'Z'
return str.slice(0, i) + String.fromCharCode(char.charCodeAt(0) + 1) + tail
}

Just to explain the main part of the code that Bipul Yadav wrote (can't comment yet due to lack of reps). Without considering the loop, and just taking the char "a" as an example:
"a".charCodeAt(0) = 97...hence "a".charCodeAt(0) + 1 = 98 and String.fromCharCode(98) = "b"...so the following function for any letter will return the next letter in the alphabet:
function nextLetterInAlphabet(letter) {
if (letter == "z") {
return "a";
} else if (letter == "Z") {
return "A";
} else {
return String.fromCharCode(letter.charCodeAt(0) + 1);
}
}

var input = "Hello";
var result = ""
for(var i=0;i<input.length;i++)
{
var curr = String.fromCharCode(input.charCodeAt(i)+1);
result = result +curr;
}
console.log(result);

I understand the original question was about moving the last letter of the string forward to the next letter. But I came to this question more interested personally in changing all the letters in the string, then being able to undo that. So I took the code written by Bipul Yadav and I added some more code. The below code takes a series of letters, increments each of them to the next letter maintaining case (and enables Zz to become Aa), then rolls them back to the previous letter (and allows Aa to go back to Zz).
var inputValue = "AaZzHello";
console.log( "starting value=[" + inputValue + "]" );
var resultFromIncrementing = ""
for( var i = 0; i < inputValue.length; i++ ) {
var curr = String.fromCharCode( inputValue.charCodeAt(i) + 1 );
if( curr == "[" ) curr = "A";
if( curr == "{" ) curr = "a";
resultFromIncrementing = resultFromIncrementing + curr;
}
console.log( "resultFromIncrementing=[" + resultFromIncrementing + "]" );
inputValue = resultFromIncrementing;
var resultFromDecrementing = "";
for( var i2 = 0; i2 < inputValue.length; i2++ ) {
var curr2 = String.fromCharCode( inputValue.charCodeAt(i2) - 1 );
if( curr2 == "#" ) curr2 = "Z";
if( curr2 == "`" ) curr2 = "z";
resultFromDecrementing = resultFromDecrementing + curr2;
}
console.log( "resultFromDecrementing=[" + resultFromDecrementing + "]" );
The output of this is:
starting value=[AaZzHello]
resultFromIncrementing=[BbAaIfmmp]
resultFromDecrementing=[AaZzHello]

We Keep Coding

JavaScript is the programming language of the Web.

Split string by spaces and double quotes in Javascript - javascript

I believe this should satisfy your requirements:- ((^|\s)".*?(\w|$)")

Related

Does the input string have a . or ? or ' or space?

How to match PHP's explode(';',$s,3) to s.split(';',3) in JavaScript?

Convert camel case to human readable string?

Escape quotes while splitting string in javascript [duplicate]

How to get the next letter of the alphabet in Javascript?

Categories

Resources