Get the matches from specific groups - javascript

I want to validate a String and, after that, get all the matches from some groups.
RegEx:
/^<[A-Za-z0-9]>::=(<[A-Za-z0-9]>)+(\|(<[A-Za-z0-9]>)+)+$/
So, if I get something like <A>::=<B><A>|<Y><A>|<Z> is valid, but if I get something like <A>::=<B>| is false.
There's no problem with the validation, the problem is that I want to take the text inside < and > because I need it later.
So, if I get <exparit>::=<number>|<number><exparit>, then I want to get ["exparit", "number", "number", "exparit"]
My code looks like
Rules = {
"BNF" : /^<[A-Za-z0-9]>::=(<[A-Za-z0-9]>)+(\|(<[A-Za-z0-9]>)+)+$/
};
var checkBNF = function ( bnf ) {
if ( Rules.BNF.test( bnf ) ) {
console.log('ok');
//How to get the text inside < and > ??
}
else {
console.log('no');
}
};
I really appreciate any kind of help, such a book, link, example or the resolution of this problem.
Thanks!

If I can help you a bit, here's something to stat with :
http://jsfiddle.net/URLhb/1/
var test = "<A>::=<B><A>|<Y><A>|<Z>";
var patt0 = /<([a-zA-Z]*)>/g; //Old and bad version
var patt1 = /[a-zA-Z]+/g;
var testRE = test.match(patt1);
alert(testRE[0]);
alert(testRE[1]);
alert(testRE[2]);
alert(testRE[3]);
alert(testRE[4]);
alert(testRE[5]);
This code will capture the text inside the < >, but also the < and > symbols with it. I'm trying to fix this, I'll update if I get a better result.
EDIT : Found the issue : I was using a * instead of a + ! It works perfectly now !

What I did was slightly different as I wasn't sure that you would only ever have [a-zA-Z0-9] within the <>
Rules = {
"BNF" : /^<[A-Za-z0-9]>::=(<[A-Za-z0-9]>)+(\|(<[A-Za-z0-9]>)+)+$/
};
var checkBNF = function ( bnf ) {
if ( Rules.BNF.test( bnf ) ) {
console.log('ok');
//How to get the text inside < and > ??
var patt = /\<(.*?)\>/g;
var strArray = bnf.match(patt);
for (i=0;i<strArray.length;i++) {
strArray[i] = strArray[i].replace('<','').replace('>','');
}
return strArray;
}
else {
console.log('no');
}
};
var test = "<A>::=<B><A>|<Y><A>|<Z>";
var result = checkBNF(test);
console.log(result)
http://jsfiddle.net/UmT3P/

Related

How to sort a complex list in Javascript?

I'm stuck this question for a while. I have a nested list, its structure looks like
Volume1
Chapter4
Chapter3
Section3-6
Section3-1
Volume2...
...
What I want is to create a sort function to sort volume, chapter, and section.
So, the result may be like
Volume1
Chapter1
Section1-1
....
So I got a complex html, here is the whole html.
I'm not sure how to swap this complex div
I've tried to get element and put them into an array.
var toSort = document.getElementById('volume5015').children;
toSort = Array.prototype.slice.call(toSort, 0);
I made a fiddle for you.
In my code I am trimming the whitespaces of the text so the don't get compared but you don't actually have to if you don't want.
var $items = $("#to-sort").children();
var $resultList = $("#list2");
sortMarkup($items, $resultList);
function sortMarkup($elements, $output) {
$elements.sort(function(a, b) {
var contentA = getPureText(a).trim();
var contentB = getPureText(b).trim();
var okay = contentA.toUpperCase().localeCompare(contentB.toUpperCase());
if( $(a).children().length > 0 )
sortMarkup( $(a).children(), $(a) );
if( $(b).children().length > 0 )
sortMarkup( $(b).children(), $(b) );
console.log("sorting ["+contentA+"] and ["+contentB+"] returned "+okay);
return okay;
});
$output.append($elements);
}
function getPureText(elem) {
return $(elem).contents().filter(function() {
return this.nodeType == 3;
})[0].nodeValue;
}
Also, I am using an output list, another thing that is not required. Feel free to modify to your needs!
fiddle: https://jsfiddle.net/vincelord/sn42argq/

Loop through array checking for indexOf's more simple?

Okay, like the title says. I have a array looking like this:
var hiTriggers = new Array();
hiTriggers = ["hi", "hai", "hello"];
And I'd like to check through it if it finds either of those. I can already achieve this by doing the following:
if(message.indexOf("hi") >= 0) {
// do whatever here!
}
But I'm looking for an more efficient way rather than doing 100 if() checks. Such as loop through an array with the "hiTriggers".
I tried the following:
for(var i; i < hiTriggers.length; i++) {
console.log(hiTriggers[i]); // simply to know if it checked them through)
if(message.indexOf(hiTriggers[i]) >= 0) {
//do stuff here
}
}
Which sadly did not work as I wanted as it does not check at all.
Thanks in advance and I hope I made sense with my post!
Edit; please note that I have 'messaged' already 'declared' at another place.
It doesn't run because you didn't give the i variable an initial value. It is undefined.
Change to use var i=0;:
for(var i=0; i < hiTriggers.length; i++) {
//console.log(hiTriggers[i]); // simply to know if it checked them through)
if(message.indexOf(hiTriggers[i]) >= 0) {
//do stuff here
console.log("found " + hiTriggers[i]);
}
}
Try using a regular expression to match the message. The \b is a word boundary marker, and the words between the | characters are what is being searched for. If any of the words appear in the message, then message.match will return the array of matches, otherwise null.
var pattern = /\b(Hello|Hi|Hiya)\b/i;
var message = "Hello World";
if (message.match(pattern))
{
console.log("do stuff");
}
You can write even simpler using a for in loop:
for(var v in hiTriggers){
if(message.indexOf(hiTriggers[v]) >= 0) {
//do stuff here
console.log("found " + hiTriggers[v]);
}
}
Problem is becoz - you have not initialized your var i, make it var i = 0;
You can try forEach loop.
hiTriggers.forEach(function(e) {
if(message.indexOf(e) >= 0) {
//do sthg here
}
})

JavaScript Throws Undefined Error

What it is supposed to do -
Example
url1(pages,"ALT") returns "www.xyz.ac.uk"
url1(pages,"xyz") returns ""
The error - TypeError: Cannot call method 'toUpperCase' of undefined
This is just for some coursework, Im stuck with these errors. Any help would be much appreciated
function index(string,pattern,caseSensitive) {
if(caseSensitive == false) {
var v = string.toUpperCase();
} else {
var v = string;
}
return indexNumber = v.indexOf(pattern);
}
var pages = [ "|www.lboro.ac.uk|Loughborough University offers degree programmes and world class research.", "!www.xyz.ac.uk!An alternative University" , "%www%Yet another University"];
alert(url1(pages, "ALT"));
function url1(pages,pattern) {
var siteContent = [];
for(i=0;i<pages.length;i++) {
var seperator = pages[i].charAt(0);
if(pages[i].indexOf(seperator)>0){
siteContent = pages[i].split(pages[i].indexOf(seperator));
}
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}else{
return "";
}
}
}
if(pages[i].indexOf(seperator)>0){
siteContent = pages[i].split(pages[i].indexOf(seperator));
}
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}else{
return "";
}
If pages[i].indexOf(seperator)<=0, siteContent is still whatever it was from the last iteration. If that happens on the first iteration, siteContent is still [], and siteContent[2] is undefined.
Another problem: the expression pages[i].indexOf(seperator) returns a number, and pages[i].split expects a delimiting string as an argument. Since the number doesn't appear in your input, you'll always get a single-element array, and siteContent[2] will always be undefined. Get rid of .indexOf(seperator), change it to siteContent = pages[i].split(seperator).
One more: get rid of the else { return ""; }. Add a return ""; after the for loop.
Finally, in the first if statement condition, change .indexOf(seperator) > 0 to .indexOf(seperator, 1) !== -1. Since you're getting seperator from the first character of the string, it will be found at 0. You want the second occurrence, so start the search at 1. In addition, .indexOf returns -1 if it doesn't find the substring. You'll need to account for this in both if conditions.
Side note, as this is not causing your problem: never use == false. JS will coerce stuff like 0 and "" to == false. If that's what you want, just use the ! operator, because the expression has nothing to do with the value false.
My final answer is http://jsfiddle.net/QF237/
Right here:
alert(url1(pages, ALT)); // ALT ISN'T DEFINED
I believe you forgot to quote it:
alert(url1(pages, "ALT"));
You should split the string passing the separator character itself. Your function then will look like:
function url1(pages,pattern) {
var siteContent = [];
for(i=0;i<pages.length;i++) {
var seperator = pages[i].charAt(0);
console.log(seperator);
if(pages[i].indexOf(seperator)>=0){
siteContent = pages[i].split(seperator); //fixed here
}
console.log(siteContent);
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}else{
return "";
}
}
}
Tell us if it worked, please.
EDIT: It seeems your index() also has a little problem. Please try the function below.
function index(string,pattern,caseSensitive) {
var v;
if(caseSensitive == false) {
v = string.toUpperCase();
pattern = pattern.toUpperCase(); //to clarify: pattern should be uppercased also if caseSensitiveness is false
} else {
v = string;
}
return v.indexOf(pattern);
}
EDIT 2:
And url1() is finally like this:
function url1(pages,pattern) {
var siteContent = [];
for(i=0;i<pages.length;i++) {
var seperator = pages[i].charAt(0);
if(pages[i].indexOf(seperator)>=0){
siteContent = pages[i].split(seperator);
}
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}
}
return "";
}
In this case, the first occurrence of pattern in all pages will be returned.

Confusing error with an if statement

I have a Greasemonkey script that keeps track of different things on unicreatures.com.
One of the things I wanted to count was clicks on some links on the page, but not all links.
These need counted,
http://unicreatures.com/explore.php?area=sea&id=89&key=bf12
These should not be counted,
http://unicreatures.com/explore.php?area=sea&gather=5&enc=394844&r=
Someone helped me figure out a regexp that did what I wanted, but I had to code each different explore location (area=**) into it, so I decided that wouldn't work.
The regexp version
var links = document.getElementsByTagName( 'a' );
for ( var i = 0; i < links.length; i++ ) {
var link = links[i];
if ( /area=sea(?!\&gather)/.test( link.href )) {
link.addEventListener( 'click', function () {
localStorage.steps=Number(localStorage.steps)+1
// alert(localStorage.steps + ' in Sargasso' );
}, true );
}
}
Obviously I don't want a billion if statements for the different values of area=, and I couldn't find a way to add a variable to a regexp.
So I finally found some string manipulation commands and put together this:
var url = window.location.href;
var startOf=url.indexOf("=")+1;
var endOf=url.indexOf("&");
var loc =url.substring(startOf,endOf);
var links = document.getElementsByTagName( 'a' );
for ( var i = 0; i < links.length; i++ ) {
var link = links[i];
if (url.indexOf("area=")>=0 && url.indexOf("gather=")<0) {
link.addEventListener( 'click', function () {
localStorage.steps=Number(localStorage.steps)+1
localStorage[loc+"Steps"]=Number(localStorage[loc+"Steps"])+1
alert(localStorage[loc+"Steps"] +" in local"+loc);
}, true );
}
}
For some reason it counts even when the second condition is false. Is this a simple case of me getting the syntax wrong somewhere, or is this a Greasemonkey bug? I don't get any errors in the console.
Try just to tweak your regexp version, instead of this:
if ( /area=sea(?!\&gather)/.test( link.href )) {
use this regex:
if ( /area=(\w*)&id=/.test( link.href )) {
that will match all links that have an 'area' parameter followed by an 'id' parameter, which seems enough to match the links you want.
Prefilter the links and test the link.href, not url -- which was set to the page's address by previous code.
var linksWithArea = document.querySelectorAll ("a[href*='area=']");
for (var J = linksWithArea.length - 1; J >= 0; --J) {
var link = linksWithArea[J];
if ( ! /gather=/i.test (link.href) ) {
link.addEventListener ('click', function () {
localStorage.steps = Number(localStorage.steps) + 1;
localStorage[loc+"Steps"] = Number(localStorage[loc+"Steps"]) + 1;
alert(localStorage[loc+"Steps"] +" in local"+loc);
}, true );
}
}

Converting html to textual representation with preserved whitespace meaning of tags -- how?

Consider such html piece:
<p>foo</p><p>bar</p>
If you run (for example) jQuery text for it you will get "foobar" -- so it is raw text actually, not textual representation.
I am looking for some ready to use library to get textual representation, in this case it should be -- "foo\nbar". Or clever hints how to make this as easy as possible ;-).
NOTE: I am not looking for beautiful output text, but just preserved meaning of whitespaces, so for:
<tr><td>foo</td><td>bar</td></tr>
<tr><td>1</td><td>2</td></tr>
I will be happy with
foo bar
1 2
it does NOT have to be:
foo bar
1 2
(but of course no harm done).
Have you looked at the innerText or textContent properties?
function getText(element){
var s = "";
if(element.innerText){
s = element.innerText;
}else if(element.textContent){
s = element.textContent;
}
return s;
}
Example
Adds a PRE tag to the body and appends the body text.
document.body.appendChild(
document.createElement('pre')
)
.appendChild(
document.createTextNode(
getText(document.body)
)
);
Edit
Does using a range work with firefox?
var r = document.createRange();
r.selectNode(document.body);
console.log(r.toString());
Edit
It looks like you're stuck with a parsing function like this then.
var parse = function(element){
var s = "";
for(var i = 0; i < element.childNodes.length; i++){
if(/^(iframe|noscript|script|style)$/i.test(element.childNodes[i].nodeName)){
continue;
}else if(/^(tr|br|p|hr)$/i.test(element.childNodes[i].nodeName)){
s+='\n';
}else if(/^(td|th)$/.test(element.childNodes[i].nodeName)){
s+='\t';
}
if(element.childNodes[i].nodeType == 3){
s+=element.childNodes[i].nodeValue.replace(/[\r\n]+/, "");
}else{
s+=parse(element.childNodes[i]);
}
}
return s;
}
console.log(parse(document.body));
I started writing my own function probably at the same time as Zapthedingbat, so just for the record:
var NodeTypeEnum = { Element : 1,Attribute : 2, Text: 3, Comment :8,Document :9};
function doTextualRepresentation(elem)
{
if (elem.nodeType==NodeTypeEnum.Text)
return elem.nodeValue;
else if (elem.nodeType==NodeTypeEnum.Element || elem.nodeType==NodeTypeEnum.Document)
{
var s = "";
var child = elem.firstChild;
while (child!=null)
{
s += doTextualRepresentation(child);
child = child.nextSibling;
}
if (['P','DIV','TABLE','TR','BR','HR'].indexOf(elem.tagName)>-1)
s = "\n"+s+"\n";
else if (['TD','TR'].indexOf(elem.tagName)>-1)
s = "\t"+s+"\t";
return s;
}
return "";
}
function TextualRepresentation(elem)
{
return doTextualRepresentation(elem).replace(/\n[\s]+/g,"\n").replace(/\t{2,}/g,"\t");
}
One thing I am surprised with -- I couldn't get
for (var child in elem.childNodes)
working, and it is a pity, because I spend most time in C# and I like this syntax, theoretically it should work in JS, but it doesn't.

Categories