Help parsing string (City, State Zip) with JavaScript - javascript

I've got a string with the following format:
City, State ZIP
I'd like to get City and State from this string.
How can I do that with JavaScript? edit: note that he doesn't mention he already has the zip code when he gets here, if that helps you in your solution ~~ drachenstern

var address = "San Francisco, CA 94129";
function parseAddress(address) {
// Make sure the address is a string.
if (typeof address !== "string") throw "Address is not a string.";
// Trim the address.
address = address.trim();
// Make an object to contain the data.
var returned = {};
// Find the comma.
var comma = address.indexOf(',');
// Pull out the city.
returned.city = address.slice(0, comma);
// Get everything after the city.
var after = address.substring(comma + 2); // The string after the comma, +2 so that we skip the comma and the space.
// Find the space.
var space = after.lastIndexOf(' ');
// Pull out the state.
returned.state = after.slice(0, space);
// Pull out the zip code.
returned.zip = after.substring(space + 1);
// Return the data.
return returned;
}
address = parseAddress(address);
This is probably better then using regular expressions and String.split(), as it takes into account that the state and city may have spaces.
EDIT: Bug fix: It only included the first word of multi-word state names.
And here's a minified version. :D
function parseAddress(a) {if(typeof a!=="string") throw "Address is not a string.";a=a.trim();var r={},c=a.indexOf(',');r.city=a.slice(0,c);var f=a.substring(c+2),s=f.lastIndexOf(' ');r.state=f.slice(0,s);r.zip=f.substring(s+1);return r;}

There are many ways to do this. Here's a very naive one:
var parts = "City, State ZIP".split(/\s+/); // split on whitespace
var city = parts[0].slice(0, parts[0].length - 1); // remove trailing comma
var state = parts[1];
var zip = parts[2];
Here's one that accounts for the presence of spaces in either the city or state or both:
var parts = "san fran bay, new mex state 666666".split(/\s+|,/),
partition = parts.indexOf(""),
city = parts.slice(0, partition).join(" "),
state = parts.slice(partition + 1, -1).join(" "),
zip = parts.pop();
This last one only works if you're lucky enough to be in an environment that supports destructuring assignment:
var city, statezip, state, zip, parts;
[city, statezip] = "Spaced City, New Mexico ZIP".split(/,\s*/);
parts = statezip.split(/\s+/);
zip = parts.pop();
state = parts.join(" ");
None of these perform any validation, of course.

Ok, since advising regex isn't good, here's my solution. It takes into account cities that have spaces in them, which the other responses here don't seem to do:
var str = "New York, NY 20101";
var cityAndRest = str.split(',');
var city = cityAndRest[0];
var stateAndZip = cityAndRest[1].trim().split(' ');
var state = stateAndZip[0];
var zip = stateAndZip[1];

First assumption: American addresses only.
First find out if the last 5 or the last 10 characters are numeric. A simpler test is to see if the last character is numeric. If so, it's probably got the zip code included. Then a simple test to see if the last 10 contains a space (city #####) or if the last ten include a dash (12345-6789) to figure out if it's a 5 or 5+4 zip. We'll test for a hyphen and no space. (city-du-lac 12345 captures -lac 12345)
Next, all addresses split the city and state by a comma, so we want the last comma. Find the index of the last comma, and split there. I don't know of a city that uses commas in it's name, and I'm sure not gonna let my parser burst on an unknown if I can help it. I do ignore the fact that Washington DC could also be Washington, DC. I figure edge cases are for libraries, not one off scripts.
Lastly, trim everything that remains to remove trailing or leading spaces.
function IsNumeric(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
var addr = 'New York City, New York 10101';
//var addr = 'San Bernadino, CA 11111';
function getCityStateZip(addr){
var city; var state;var zip;
city = ''; state = ''; zip = '';
var addrLen = addr.length;
if ( IsNumeric( addr.substring(addrLen - 1) ) ) {
//contains a zipcode - just a sanity check
//get last 10 characters for testing easily
var lastTen = addr.substring( addrLen - 10 );
if ( lastTen.indexOf('-') > 0 && ( lastTen.indexOf(' ') == -1 ) ) {
//found a hyphen and no space (matches our complex rule for zipcodes)
zip = lastTen;
} else {
zip = addr.substring( addrLen - 5 ); //assume a basic 5 zip code
}
}
var zipLen = zip.length;
addrLen = addrLen - zipLen - 1;
addr = addr.substring(0, addrLen ); //remove the chars we just moved into zip
var lastComma = addr.lastIndexOf(',');
if ( lastComma == -1 ) {
//you have a problem, how do you want to handle it?
}
city = addr.substring(0,lastComma); //skip the comma itself, yes?
state = addr.substring(lastComma + 2);
return { 'city':city,'state': state,'zip': zip};
}
getCityStateZip(addr)
IsNumeric js function can be found here Validate decimal numbers in JavaScript - IsNumeric()

Easy way but no validation:
var addrObj={};
parseAddress("Beverly Hills, CA 90210",addrObj);
function parseAddress(address, addrObj){
var arr=address.replace(","," ").split(" ");
addrObj.zip=arr.pop();
addrObj.state=arr.pop();
addrObj.city=arr.join(" ");
}

For this type of thing you might want to use JavaScripts RegEx functions.
Here's some info:
http://www.javascriptkit.com/javatutors/re.shtml

Related

Google App Script Regex exec() returns null only in one function

I am writing a Google Apps script to create a calendar event based on automated emails I receive for jobs. I am using regex expressions to extract information that I need to populate the event in Google Calendar. So far, I have everything functioning as expected except for one function, getEndTime(), which should find the end time of the job, but presently returns null any time it's called. All of my other functions using exec() work fine.
I have read many other questions regarding exec() returning null and have fixed common issues, such as removing the 'g' tag and resetting the lastIndex to 0 before calling exec(). I have also checked my regex expression using regex101.com with the Javascript option, which shows the match that I expect for my text.
My regex expression that works on regex101, but not in my code is:
/(Substitute\s+Report\s+Times:\s+[0-9_ ]*:[0-9_ ]*\s+[A-Z_ ]*\s+-\s+)([0-9_ ]*:[0-9_ ]*\s+(AM|PM))(\r|\n)/
My code is:
function findJobs() {
//Searches Gmail for substitute jobs and creates an event on the calendar
//Gets emails with 'NewJobs' label
var label = GmailApp.getUserLabelByName("NewJobs");
var threads = label.getThreads();
for (var i = 0; i < threads.length; i++){
var messages = threads[i].getMessages();
Logger.log("Thread " + i);
for (var j = 0; j < messages.length; j++) {
Logger.log("Message " + j);
//gets email body in plain text
var body = messages[j].getPlainBody();
Logger.log("Getting body..." + j);
//gets school name
var school = getSchool(body);
Logger.log(school);
//gets start time
var starttime = getStartTime(body);
Logger.log(starttime);
//gets end time
var endtime = getEndTime(body);
Logger.log(endtime);
//gets teacher name
var teacher = getTeacher(body);
Logger.log(teacher);
//gets school address
var address = getLocation(body);
Logger.log(address);
//gets date
var startdate = getDate(body);
Logger.log(startdate);
CalendarApp.getDefaultCalendar().createEvent("Subbing - " + school, new Date(startdate + " " + starttime), new Date(startdate + " " + endtime), {location: address, description: teacher});
//threads[j].removeLabel(label);
}
}
Logger.log("--Done--");
}
function getSchool(text){
//Gets the school name from an assignment email
//Regular expression for school name
var regex = /(School\s+:\s+)([a-zA-Z0-9_ ]*)(\r|\n)/;
regex.lastIndex = 0;
var match = regex.exec(text)[2];
return match;
}
function getDate(text){
//Gets the start date from an assignment email
//Regular expression for start date
var regex = /(Date:\s+)([0-9_ ]*\/[0-9_ ]*\/[0-9_ ]*)(\r|\n)/;
regex.lastIndex = 0;
var match = regex.exec(text)[2];
return match;
}
function getStartTime(text){
//Gets the start time from an assignment email
//Regular expression for start time
var regex = /(Substitute\s+Report\s+Times:\s+)([0-9_ ]*:[0-9_ ]*\s+(AM|PM))/;
regex.lastIndex = 0;
var match = regex.exec(text)[2];
return match;
}
function getEndTime(text){
//Gets the end time from an assignment email
//Regular expression for end time
var regex = /(Substitute\s+Report\s+Times:\s+[0-9_ ]*:[0-9_ ]*\s+[A-Z_ ]*\s+-\s+)([0-9_ ]*:[0-9_ ]*\s+(AM|PM))(\r|\n)/;
regex.lastIndex = 0;
Logger.log("End Time reset index...");
var match = regex.exec(text)[2];
Logger.log("End Time exec...");
return match;
}
function getTeacher(text){
//Gets the teacher name from an assignment email
//Regular expression for teacher name
var regex = /(Teacher\s+:\s+)([a-zA-Z0-9_ ]*,[a-zA-Z0-9_ ]*)(\r|\n)/;
regex.lastIndex = 0;
var match = regex.exec(text)[2];
return match;
}
function getLocation(text){
//Gets the location from an assignment email
//Regular expression for location
var regex = /(Address:\s+)(.*)(\r|\n)/;
regex.lastIndex = 0;
var match = regex.exec(text)[2];
return match;
}
Here is an typical email I receive:
You have been assigned as a substitute for a job starting on 9/21/2017.
The following are the details of the job:
*************
Job Summary
*************
Starting On : 9/21/2017
School : School Site
Title : Pre School Teacher
Teacher : Name, Teacher
Substitute : Name, Substitute
Confirmation # : 123456
**********
Job Days
**********
School
---------------------------------------
School Site
Date: 9/21/2017
Employee Times: 8:00 AM - 3:30 PM
Substitute Report Times: 8:00 AM - 3:30 PM
***********************************
School Contact Information
***********************************
School Site
-----------------------------------------------------------
Address: 123 Main Ave Anytown , USA 555555
Phone: 5555555555
-----------------------------------------------------------
**********************
Special Instructions
**********************
Please do not reply to this system generated message. If you need help or have additional questions, please send an email to abc#abc.com
Thank you for using the substitute assignment system. Powered by Aesop
The pattern you're using seems overly complicated. I can't say for sure what's causing it to fail, but my guess would be the (\r|\n) at the end (note that you can just type [\r\n] instead if you really want to do this).
Give this pattern a try:
Substitute Report Times:.+ - (\d{1,2}:\d{1,2} [AP]M)
This assumes that the end time is always preceded by a hyphen and a space, which looks to be the case from the sample text you provided.

Mask a portion of a String using RegExp

I'm trying to mask a portion of a string using JavaScript.
e.g. Mask second and third segment of credit-card number like this using regex:
4567 6365 7987 3783 → 4567 **** **** 3783
3457 732837 82372 → 3457 ****** 82372
I just want to keep the first 4 numbers and the last 5 characters.
This is my first attempt: /(?!^.*)[^a-zA-Z\s](?=.{5})/g
https://regex101.com/r/ZBi54c/2
You can try this:
var cardnumber = '4567 6365 7987 3783';
var first4 = cardnumber.substring(0, 4);
var last5 = cardnumber.substring(cardnumber.length - 5);
mask = cardnumber.substring(4, cardnumber.length - 5).replace(/\d/g,"*");
console.log(first4 + mask + last5);
You could slice the first four digits and apply a replacement for the rest.
console.log(
['4567 6365 7987 3783', '3457 732837 82372'].map(
s => s.slice(0, 4) + s.slice(4).replace(/\d(?=.* )/g, '*')
)
);
The answer apparently satisfies the OP. Here is another solution using only Regexes:
function starry(match, gr1, gr2, gr3) {
var stars = gr2.replace(/\d/g, '*');
return gr1 + " " + stars + " " + gr3;
}
function ccStarry(str) {
var rex = /(\d{4})\s(\d{4}\s\d{4}|\d{6})\s(\d{4}|\d{5})/;
if (rex.test(str))
return str.replace(rex, starry);
else return "";
}
var s1 = "4567 6365 7987 3783";
var s2 = "3457 732837 82372";
var s3 = "dfdfdf";
console.log(ccStarry(s1));
console.log(ccStarry(s2));
console.log(ccStarry(s3));
This ensures that the pattern matches before trying any replacements. For example, in the third test case, it returns an empty string. The pattern can be updated to match other credit card patterns besides the ones given in the question.
I would like to elaborate more on the answer from #Nina Scholz, I use .slice() in the following sample code for masking the variable in 2 condition.
Just a simple variable var n = '12345567890'
Array object
// Single number
var n = '601115558888';
var singleNumber = n.slice(0, 4) + n.slice(4, n.length -4).replace(/\d/g,'*') + n.slice(n.length -4);
console.log(singleNumber);
// array of object
var obj = [{
contacts_name: 'Jason',
contacts_num : '651231239991'
},
{
contacts_name: 'King',
contacts_num : '60101233321'
}];
// Mask for the middle number, showing the first4 number and last4 number
// and replace the rest number with *
var num = obj.map((element, index) =>
element.contacts_num.slice(0,4)
+ element.contacts_num.slice(4, element.contacts_num.length-4).replace(/\d/g, '*')
+ element.contacts_num.slice(element.contacts_num.length -4)
);
console.log(num);
If it's JavaScript doing the regex masking, you've already failed because JS should never need to know the original card number, except when you've just received it from the user and are sending it to the server for the first time, in which case you shouldn't be masking it anyway so the user can check for typos.
I can't really help you there, you've already failed in the worst way.
Server-side, if the number is already broken into spaces*, then one option is: (in PHP but the same idea applies to all)
$parts = explode(" ",$fullnumber);
$first = array_shift($parts);
$last = array_pop($parts);
$middle = implode(" ",$parts);
$mask = preg_replace("/\d/","*",$middle);
$result = "$first $mask $last";
* it shouldn't be

Find the index of a string in Javascript with help of first three characters

I have numerous tsv files each with header row. Now one column name in header row is age. In few files, column name is age while in other files it has EOL charcter such as \r \n.
Now how can i use str.indexOf('age') function so that i get index of age irrespective of column name age with EOL character such as \n , \r etc..
Foe eg:
tsv file1:
Name Address Age Ph_Number
file 2:
Name Address Age/r
file 3:
Name Address Age\n
I am trying to find index of age column in each files header row.
However when i do-
header.indexOf('age')
it gives me result only in case of file1 because in other 2 files we have age as age\r and age\n..
My question is how should i find index of age irrespective of \r \n character along with age in header row.
i have following script now:
var headers = rows[0].split('\t');
if (file.name === 'subjects.tsv'){
for (var i = 0; i < rows.length; i++) {
var ageIdColumn = headers.indexOf("age");
console.log(headers)
As I stated in the comments, indexOf() returns the starting position of the string. It doesn't matter what comes after it:
var csvFile1 = 'column1,column2,column3,age,c1r1';
var csvFile2 = 'column1,column2,column3,age\r,c1r1';
var csvFile3 = 'column1,column2,column3,age\n,c1r1';
console.log(csvFile1.indexOf("age"));
console.log(csvFile2.indexOf("age"));
console.log(csvFile3.indexOf("age"));
If you specifically want to find the versions with the special characters, just look for them explicitly:
var csvFile4 = 'column1,age\r,column2,column3,age\n,c1r1';
console.log(csvFile4.indexOf("age\r"));
console.log(csvFile4.indexOf("age\n"));
Lastly, it may be that you are confused as to what, exactly indexOf() is supposed to do. It is not supposed to tell you where all occurrences of a given string are. It stops looking after the first match. To get all the locations, you'd need a loop similar to this:
var csvFile5 = 'column1,age\r,column2,age, column3,age\n,c1r1';
var results = []; // Found indexes will be stored here.
var pos = null; // Stores the last index position where "age" was found
while (pos !== -1){
// store the index where "age" is found
// If pos is not null, then we've already found age earlier and we
// need to start looking for the next occurence 3 characters after
// where we found it last time. If pos is null, we haven't found it
// yet and need to start from the beginning.
pos = csvFile5.indexOf("age", pos != null ? pos + 3 : pos );
pos !== -1 ? results.push(pos) : "";
}
// All the positions where "age" was in the string (irrespective of what follows it)
// are recorded in the array:
console.log(results);

Find two numbers in a string

This is a follow on from my previous question which can be found here
Link For Previous Question
I am posting a new question as the answer I got was correct, however my next question is how to take it a step further
Basically I have a string of data, within this data somewhere there will be the following;
Width = 70
Void = 40
The actual numbers there could be anything between 1-440.
From my previous question I found how to identify those two digits using regular expression and put them into separate fields, however, my issue now is that the string could contain for example
Part Number = 2353
Length = 3.3mm
Width = 70
Void = 35
Discount = 40%
My question is;
How do I identify only the Width + Void and put them into two separate fields, the answer in my previous question would not solve this issue as what would happen is in this example I would have an array of size 4 and I would simply select the 2nd and 3rd space.
This is not suitable for my issue as the length of array could vary from string to string therefore I need a way of identifying specifically
Width = ##
Void = ##
And from there be able to retrieve the digits individually to put into my separate fields
I am using JavaScript in CRM Dynamics
A simpler option is to convert the whole string into an object and get what you need from that object.
str = "Part Number = 2353\n" +
"Length = 3.3mm\n" +
"Width = 70\n" +
"Void = 35\n" +
"Discount = 40%\n";
data = {};
str.replace(/^(.+?)\s*=\s*(.+)$/gm, function(_, $1, $2) {
data[$1] = $2;
});
alert(data['Width']);
Width\s+=\s+(\d+)|Void\s+=\s+(\d+)
You can try this.Grab the capture.See demo.
http://regex101.com/r/oE6jJ1/31
var re = /Width\s+=\s+(\d+)|Void\s+=\s+(\d+)/igm;
var str = 'Part Number = 2353\n\nLength = 3.3mm\n\nWidth = 70\n\nVoid = 35\n\nDiscount = 40%';
var m;
while ((m = re.exec(str)) != null) {
if (m.index === re.lastIndex) {
re.lastIndex++;
}
// View your result using the m-variable.
// eg m[0] etc.
}
You can use this regex for matching input with Width and Void in any order:
/(\b(Width|Void) += *(\d+)\b)/
RegEx Demo
Your variable names and values are available in captured groups.

Address String Split in javascript

Ok folks I have bombed around for a few days trying to find a good solution for this one.
What I have is two possible address formats.
28 Main St Somecity, NY 12345-6789
or
Main St Somecity, Ny 12345-6789
What I need to do Is split both strings down into an array structured as such
address[0] = HousNumber
address[1] = Street
address[2] = City
address[3] = State
address[4] = ZipCode
My major problem is how to account for the lack of a house number. with out having the whole array shift the data up one.
address[0] = Street
address[1] = City
address[2] = State
address[3] = ZipCode
[Edit]
For those that are wondering this is what i am doing atm . (cleaner version)
place = response.Placemark[0];
point = new GLatLng(place.Point.coordinates[1],place.Point.coordinates[0]);
FCmap.setCenter(point,12);
var a = place.address.split(',');
var e = a[2].split(" ");
var x = a[0].split(" ");
var hn = x.filter(function(item,index){
return index == 0;
});
var st = x.filter(function(item,index){
return index != 0;
});
var street = '';
st.each(function(item,index){street += item + ' ';});
results[0] = new Hash({
FullAddie: place.address,
HouseNum: hn[0],
Dir: '',
Street: street,
City: a[1],
State: e[1],
ZipCode: e[2],
GPoint: new GMarker(point),
Lat: place.Point.coordinates[1],
Lng: place.Point.coordinates[0]
});
// End Address Splitting
Reverse the string, do the split and then reverse each item.
Update: From the snippet you posted, it seems to me that you get the address from a Google GClientGeocoder Placemark. If that is correct, why are you getting the unstructured address (Placemark.address) instead of the structured one (Placemark.AddressDetails)? This would make your life easier, as you would have to try and parse only the ThoroughfareName, which is the street level part of the address, instead of having to parse everything else as well.
function get_address (addr_str) {
var m = /^(\d*)\s*([-\s\w.]+\s(?:St|Rd|Ave)\.?)\s+([-\s\w\.]+),\s*(\w+)\s+([-\d]+)$/i.exec(s);
var retval = m.slice(1);
if (!retval[0]) retval = retval.slice(1);
return retval;
}
Assume all streets ends with St, Rd or Ave.
var address = /[0-9]/.match(string.charAt(0))
? string.split(" ") : [ " "
].concat(string.split(" "));
This is not particularly robust, but it accounts for the two enumerated cases and is concise at only one line.
I've got a similar problem I'm trying to solve. It seems that if you look for the first space to the right of the house number, you can separate the house number from the street name.
Here in Boston you can have a house number that includes a letter! In addition, I've seen house numbers that include "1/2". Luckily, the 1/2 is preceded by a hyphen, so there aren't any embedded spaces in the house number. I don't know if that's a standard or if I'm just getting lucky.

Categories