I want to retrieve inside an array all the elements who match multiple strings (all of them & not necessary words): like a search engine returning all results matching term_searched#1 && term_searched#2.
It's not a question about duplicates in the array (there's none), but about searching for a conjunction of elements: traditionally, the search is for one element, by himself or in disjunction with others (a|b|c). Just want to search (a && b && c).
I tried:
indexOf() : I can work only with one element to locate in the array.
match() : there is no AND operator in a regex expression (only | - sadly, it would be so simple). So I tried to inject these regex expressions
/(?=element1).*(?=element2)/gim
/(?=element1)(?=element2)/gim see here
The first regex expression works, but not at every time: seems very fragile...
So I don't know if I'm in the good direction (match) or if I can't figure what is the right regex expression... Need your advices.
// filter grid by searching on 'input' event
'input #search': (e)=> {
var keypressed = e.currentTarget.value;
// create array on 'space' input
var keyarr = keypressed.toLowerCase().split(" ");
// format each array's element into regex expression
var keyarrReg = [];
for(i = 0; i < keyarr.length; i++) {
var reg = '(?=' + keyarr[i] + ')';
keyarrReg.push(reg);
}
// array to regex string into '/(?=element1).*(?=element2)/gim' format
var searching = new RegExp(keyarrReg.join(".*"), 'mgi');
// set grid
var grid = new Muuri('#gridre', {
layout: {
fillGaps: true,
}
});
if (keypressed) {
// filter all grid's items (grid of items is an array)
grid.filter(function (item) {
var searchoperator = item.getElement().textContent.toLowerCase().match(searching);
// get items + only their text + lower case their text + return true (not false) in the value ('keypressed') is found in them
//var searchoperator = item.getElement().textContent.toLowerCase().indexOf(keypressed.toLowerCase()) != -1;
return searchoperator;
}
[....]
}
}
Edit with Gawil's answer adapted to my initial code (to help if needed)
// filter grid by searching on 'input' event
'input #search': (e)=> {
var keypressed = e.currentTarget.value;
// create array on 'space' input
var keyarr = keypressed.toLowerCase().split(" ");
// convert the array to a regex string, in a '^(?=.*word1)(?=.*word2).*$' format
// here is Gawil's answer, formatted by Teemu
var searching = new RegExp('^(?=.*' + keyarr.join(')(?=.*') + ').*$', 'm');
// set grid
var grid = new Muuri('#gridre', {
layout: {
fillGaps: true,
}
});
if (keypressed) {
// filter all grid's items (grid of items is an array)
grid.filter(function (item) {
// get items + only their text + lower case their text + delete space between paragraphs
var searchraw = item.getElement().textContent.toLowerCase().replace(/\r\n|\n|\r/gm,' ');
var searchoperator = searchraw.match(searching);
return searchoperator;
}
[....]
}
}
The code bellow will log each element of the array containing words cats and dogs.
It uses the regex ^(?=.*word1)(?=.*word2).*$To handle new lines, use this one instead :
^(?=(?:.|\n)*word1)(?=(?:.|\n)*word2).*$
You can add as many words as you want following the same logic, and it does not take order of the words in count.
It is very similar to what you tried, except that you have to do all (?=) checks before matching the string. Indeed, your first regex works only when the words are in the right order (element1 and then element2). Your second regex almost works, but you wrote only lookaheads, so it checks the presence of each word, but won't match anything.
var words = ["cats", "dog"]
var array = [
"this is a string",
"a string with the word cats",
"a string with the word dogs",
"a string with both words cats and dogs",
"cats rule everything",
"dogs rule cats",
"this line is for dog\nbut cats prefer this one"
]
var regexString = "^";
words.forEach(function(word) { regexString += ("(?=(?:.|\n)*"+word+")"); });
var regex = new RegExp(regexString);
array.forEach(function(str) { // Loop through the array
if(str.match(regex)) {
console.log(str); // Display if words have been found
}
});
If I've correctly understood your question, you've an array of strings, and some keywords, which have to be found from every index in the array to be accepted in the search results.
You can use a "whitelist", i.e. a regExp where the keywords are separated with |. Then iterate through the array, and on every member create an array of matches against the whitelist. Remove the duplicates from the matches array, and check, that all the keywords are in the list simply by comparing the length of the matches array to the count of the keywords. Like so:
function searchAll (arr, keywords) {
var txt = keywords.split(' '),
len = txt.length,
regex = new RegExp(txt.join('|'), 'gi'), // A pipe separated whitelist
hits; // The final results to return, an array containing the contents of the matched members
// Create an array of the rows matching all the keywords
hits = arr.filter(function (row) {
var res = row.match(regex), // An array of matched keywords
final, temp;
if (!res) {return false;}
// Remove the dups from the matches array
temp = {}; // Temporary store for the found keywords
final = res.filter(function (match) {
if (!temp[match]) {
// Add the found keyword to store, and accept the keyword to the final array
return temp[match] = true;
}
return false;
});
// Return matches count compared to keywords count to make sure all the keywords were found
return final.length === len;
});
return hits;
}
var txt = "Some text including a couple of numbers like 8 and 9. More text to retrieve, also containing some numbers 7, 8, 8, 8 and 9",
arr = txt.split('.'),
searchBut = document.getElementById('search');
searchBut.addEventListener('change', function (e) {
var hits = searchAll(arr, e.target.value);
console.log(hits);
});
<input id="search">
The advantage of the whitelist is, that you don't have to know the exact order of the keywords in the text, and the text can contain any characters.
Related
I have seen similar questions to this but none that are trying to achieve quite the same thing. I need to find every instance of a regex pattern but replace every instance with a different value. Here is my code:
function replacingText(){
var names = ["Ethan", "Kyle", "Chase", "Cole"];
var sentance = 'This is [Cole] and [Chase].'
var regex = /\[(.*?)\]/gm;
for(i of names){
sentance = sentance.replace(regex, i);
}
console.log(sentance);
}
This code results in this:
This is Ethan and Ethan.
But I want:
This is Ethan and Kyle.
Really, I just need some way to find each item that is inside of brackets [ ] and replace that item with a unique value and then rebuild the string with the new values. I am not partial to any approach.
Try remove g from regex. It helps
If seems like you want to find a match, and replace with the first string in the array, and for the next match, then replace it with the second string, etc? You also have to consider the case, what if that array runs out of items.
In the original code, you are replacing everything with the first match and the [ and ] are gone. So you need to replace one, and then go onto the next one:
function replacingText() {
var names = ["Ethan", "Kyle", "Chase", "Cole"];
var sentance = 'This is [Cole] and [Chase].'
var regex = /\[(.*?)\]/m;
i = 0;
while (regex.test(sentance) && i < names.length) {
sentance = sentance.replace(regex, names[i]);
i++;
}
console.log(sentance);
}
replacingText();
Your original code works too, if you replace one occurrence only:
function replacingText() {
var names = ["Ethan", "Kyle", "Chase", "Cole"];
var sentance = 'This is [Cole] and [Chase].'
var regex = /\[(.*?)\]/m;
for (i of names) {
sentance = sentance.replace(regex, i);
}
console.log(sentance);
}
replacingText();
But then, note that if you have 20 or 200 items in the array, then it will run through the array for every item, even if there is no match.
You can pass a function to .replace(), the function should return the replacement value:
function replacingText(){
var names = ["Ethan", "Kyle", "Chase", "Cole"];
var sentance = 'This is [Cole] and [Chase].'
sentance.replace(/\[(.*?)\]/g, function() {
return names.shift();
});
}
I am trying to build a search function where the search is inclusive of all the words in the search. For example if I had 3 items:
The Red House
The Red Car
Home Garage for your car
In the search box (Which currently is dynamic as you type), if you type Red you would get the the first two above. If I type Red, Car, I should only get line two, The Red Car, because that's the only one that has BOTH red and car in it.
I tried building an array of the search items then I have this for the search box:
$scope.search = function(item) {
var str = $scope.searchText;
if(!str || str === undefined) {
return true;
}
if (!item || item === undefined) return false;
var arr = str.toString().split(' ');
console.log('ARRAY - ' + arr);
var found = true;
arr.forEach(function(element) {
console.log ( "Element " + element.toString()) ;
if(item.tags.toString().toLowerCase().indexOf(element.toLowerCase()) >= 0 ) {
console.log ("Tags " + item.tags.toString()) ;
found = true;
return true;
}
found = false;
return false;
});
return found;
Right now with that code it's only giving me the results to the last word in my search... And if I delete and add words it doesn't seem to respond correctly.
Not sure how far off I am on this.
You should use Array.prototype.every to check that every word passes the test. You may also extract the lowercase tags before looping, so that it's not done on every loop unnecessarily:
const lowerTags = item.tags.toString().toLowerCase();
const arr = str.toString().split(' ');
const haveAll = arr.every(wordToFind => lowerTags.includes(wordToFind.toLowerCase()));
return haveAll;
If str is already a string, there's no need to call toString() on it (and similarly for item.tags) - might make the code more readable.
The following script contains an array of your searchable strings. When the function Search is called, it accepts a string as an argument. That string is split into array elements for the various words in the string. Then the function loops through each search term and compares it to each searchable item. If the item and term match, the term is pushed into a results array, which the function will return for you to use as you see fit.
<script>
searchableItems = [
"The Red House",
"The Red Car",
"Home Garage For Your Car"
];
Search("Red");
function Search(searchTerm){
results = [];
searchWords = searchTerm.split();
i = 0;
while(i < searchWords.length){
j = 0;
while(j < searchableItems.length){
if(searchableItems[j].indexOf(searchWords[i]) > -1){
results.push(searchableItems[j]);
}
j++;
}
i++;
}
return results;
}
</script>
i have a bug in this code that i cannot seem to solve. if there is only 1 instance of Act, it works as it should. But when there is more than 1 instance of Act, it breaks. Not sure what I am missing here.
//Find all instances of italics
var findItalics = new RegExp(/(<em>.*?<\/em>)/g);
var italicsArray = [];
var italicCount;
while (italicCount = findItalics.exec(searchInput)) {
italicsArray.push(italicCount[0]);
}
//Find the italics containing the word 'Act'
var keywordItalics = new RegExp(/<em>.*?(Act).*?<\/em>/g);
var keywordItalicArray = [];
var italicCountKeyword;
while (italicCountKeyword = keywordItalics.exec(italicsArray)) {
keywordItalicArray.push(italicCountKeyword[0]);
}
//Remove all instances of the keyword(s)
for (var tlcs = italicsArray.length - 1; tlcs >= 0; tlcs--) {
if(italicsArray[tlcs] == keywordItalicArray) {
italicsArray.splice(tlcs, 1);
}
}
Thanks to #artgb who helped me rethink this.
//Find all instances of italics
var findItalics = new RegExp(/(<em>.*?<\/em>)/g);
var italicsArray = [];
var italicCount;
while (italicCount = findItalics.exec(searchInput)) {
italicsArray.push(italicCount[0]);
}
//Find the italics containing the word 'Act'
var keywordItalics = new RegExp(/<em>.*?(Act).*?<\/em>/g);
var keywordItalicArray = [];
var italicCountKeyword;
while (italicCountKeyword = keywordItalics.exec(searchInput)) {
keywordItalicArray.push(italicCountKeyword[0]);
}
//Remove all instances of the keyword(s)
for(var xXx = 0; xXx < keywordItalicArray.length; xXx++){
for (var tlcs = italicsArray.length - 1; tlcs >= 0; tlcs--) {
if(italicsArray[tlcs] == keywordItalicArray[xXx]) {
italicsArray.splice(tlcs, 1);
}
}
}
var keywordItalics = new RegExp(/<em>.*?(Act).*?<\/em>/g);
Should usually be shortened to:
var keywordItalics = /<em>.*?(Act).*?<\/em>/g;
Where your () are, this would only get a capture of "Act", so to capture whole string in the em, it should be:
var keywordItalics = /<em>(.*?Act.*?)<\/em>/g;
However, a faster way (without regexp) you could get an array of all the emphasized tags just by:
var keywordItalics = document.getElementsByTagName('em');
If you're just trying to get rid of all em's containing "Act", all you should need is:
document.body.innerHTML = document.body.innerHTML.replace(
/<em>.*?Act.*?<\/em>/g,
''
);
This should remove all traces of em's containing "Act" in the document (effectively replacing those strings with an empty string, aka nothing). It will cause a reflow, however. If they are inside a containing element besides body, would be better to get containing element first (instead of using body). There are "better" ways of doing this, but this is probably simplest coding-wise.
Update: an easy way to remove em's with "Act" from the array would be:
italicsArray = italicsArray
.join('_SEP_') // Convert to string
.replace(/<em>.*?Act.*?<\/em>/g,'') // Delete matched entries
.replace(/(_SEP_)+/g,'_SEP_') // Collapse multiple seperators
.split('_SEP_') // Convert back to array
;
This basically uses a seperator _SEP_ (to avoid collisions with strings containing ',') and turns the array into a string, deletes all matches to your regexp, removes what would become undefined entries, and recreates the array in the same name.
My problem is as follows: I am trying to take data as formatted in the 'names' variable in the snippet below, convert the string to array, then reorganize the array so the text is in the correct order. I am able to get the pieces I have put together to properly sort the first or last instance of a first & last name, but am seeking guidance on how to go about processing multiple names. The snippet below will return the last instance of the first & last name in the correct order. At this point, I am only looking to have the data returned as a properly sorted array, e.g.
if the input string is
names = "Bond, James & Banner, Bruce";
once processed should return: ['James', 'Bond,', '&', 'Bruce', 'Banner,']
As always I appreciate all the help I can get, thanks in advance!
Array.prototype.move = function(from,to){
this.splice(to,0,this.splice(from,1)[0]);
return this;
};
var names ="Bond, James & Banner, Bruce";
var namesArr = names.split(' ');
var idx;
// search for a comma (only last names have commas with them)
for(var i = 0; i < namesArr.length; i++) {
if(namesArr[i].indexOf(',') != -1) {
idx = i;
}
}
namesArr.move(idx, idx+1);
console.log(namesArr);
You were close but this solution should work for you. Basically you need to update in the loop and increment the index i to account for the switch. Otherwise you will end up revisiting the first last name you switch.
Array.prototype.move = function(from,to){
this.splice(to,0,this.splice(from,1)[0]);
return this;
};
var names ="Bond, James & Banner, Bruce & Guy, Other";
var namesArr = names.split(' ');
var idx;
// search for a comma (only last names have commas with them)
for(var i = 0; i < namesArr.length; i++) {
if(namesArr[i].indexOf(',') != -1) {
namesArr.move(i, i+1);
i++;
}
}
console.log(namesArr);
Another solution could be by using String.prototype.match() and a regular expression \w+ to match the names:
var names = "Bond, James & Banner, Bruce & Licoln, Anna"; // ...
var arr_names = names.match(/\w+/g); // Match names
var res = [];
for (var i = 0; i < arr_names.length; i += 2) { // Step by 2
res.push(arr_names[i + 1]); // Push the name before
res.push(arr_names[i]); // Push the current name
res.push("&"); // Add "&"
}
res.splice((res.length - 1), 1); // Remove last "&"
console.log(res);
I'm trying to create a code that will take a sentence as a param, split that sentence into an array of words and then create a loop that checks if any of theses word matches a word in some other arrays.
In the example below, I have a sentence that contains the word "ski". This means that the return value should be categories.type3.
How can I have make the loop check this? Could I have a function switching between different categories ? (ie : if a word is not in action, look in adventure and so on).
var categories = {
type1: "action",
type2: "adventure",
type3: "sport"
}
var Sentence = "This sentence contains the word ski";
var sport = ["soccer", "tennis", "Ski"];
var action = ["weapon", "explosions"];
var adventure = ["puzzle", "exploring"];
var myFreeFunc = function (Sentence) {
for (var i = 0; i < arrayLength; i++) {
if (typeArr[i] == word) {
}
}
}
You appear to want to know which categories match the sentence.
To start with, get rid of the meaningless type1 etc identifiers and re-arrange your fixed data into objects that directly represent the required data, specifically a Map of key/value pairs, where each key is a "category" name, and each value is a Set of keywords associated with that category:
var categories = new Map([
['action', new Set(['weapon', 'explosions'])],
['adventure', new Set(['puzzle', 'exploring'])],
['sport', new Set(['soccer', 'tennis', 'ski'])]
]);
[NB: Set and Map are new ES6 features. Polyfills are available]
You now have the ability to iterate over the categories map to get the list of categories, and over the contents of each category to find the key words:
function getCategories(sentence) {
var result = new Set();
var words = new Set(sentence.toLowerCase().split(/\b/g)); /* "/b" for word boundary */
categories.forEach(function(wordset, category) {
wordset.forEach(function(word) {
if (words.has(word)) {
result.add(category);
}
});
});
return result.values(); // NB: Iterator interface
}
NB: I've avoided for .. of because it's not possible to polyfill that, whereas Set.prototype.forEach and Map.prototype.forEach can be.
I would rewrite the code (you should always combine var statements).
I've added a small fiddle snippet, how i would rewrite the function. Just as an example, how you could iterate your data. Of course you should check out the other posts to optimise this code snipped ( e.g. fix for multiple spaces! ).
// make sure, your dictionary contains lower case words
var categories = {
action: ["soccer", "tennis", "ski"],
adventure: ["weapon", "explosions"],
sport: ["puzzle", "exploring"]
}
var myFreeFunc = function myFreeFunc(Sentence) {
// iterates over all keys on the categories object
for (var key in categories) {
// convert the sentence to lower case and split it on spaces
var words = Sentence.toLowerCase().split(' ');
// iterates the positions of the words-array
for (var wordIdx in words)
{
// output debug infos
console.log('test:', words[wordIdx], categories[key], categories[key].indexOf(words[wordIdx]) != -1, '('+categories[key].indexOf(words[wordIdx])+')');
// lets the array function 'indexOf' check for the word on position wordIdx in the words-array
if (categories[key].indexOf(words[wordIdx]) != -1 ) {
// output the found key
console.log('found', key);
// return the found key and stop searching by leaving the function
return key;
}
}//-for words
}//-for categories
// nothing found while iterating categories with all words
return null;
}
stripped down the function part snippet (no comments, no extra spaces, no console.log):
var myFreeFunc = function myFreeFunc(Sentence) {
for (var key in categories) {
var words = Sentence.toLowerCase().split(' ');
for (var wordIdx in words)
{
if (categories[key].indexOf(words[wordIdx]) != -1 ) {
return key;
}
}
}
return null;
}
Accumulated the topics covered in the comments
check if the Object really owns the property: obj.hasOwnProperty(prop)
split string by word bounds, as mentioned by Alnitak (using RegExp): /\b/g
collecting categories for multiple matching
Snippet:
var myFreeFunc = function myFreeFunc(Sentence) {
var result = []; // collection of results.
for (var key in categories) {
if (categories.hasOwnProperty(key)) { // check if it really is an owned key
var words = Sentence.toLowerCase().split(/\b/g); // splitting on word bounds
for (var wordIdx in words)
{
if (categories[key].indexOf(words[wordIdx]) != -1 ) {
result.push(key);
}
}
}
}
return result;
}
One simple way would be to do like this :
function determineCategory(word){
var dictionnary = {
// I assume here you don't need category1 and such
action: ["weapon", "explosions"],
aventure: ["puzzle", "exploring"],
sport: ["soccer", "tennis", "ski"]
}
var categories = Object.keys(dictionnary);
for(var i = 0; i<categories.length; i++){
for(var j = 0; j<categories[i].length;j++){
var wordCompared = dictionnary[categories[i]][j];
if(wordCompared == word){
return categories[i];
}
}
}
return "not found";
}
var sentence = "This sentence contains the word ski";
var words = sentence.split(" "); // simple separation into words
var result = [];
for(var i=0; i<words.length; i++){
result[i] = determineCategory(words[i]);
}
A few notes on this approach :
it needs you to change your existing structure (I don't know if its possible)
it doesn't do much for your sentence splitting (just using the white space). For more clever approach, see Alnitak's answer, or look for tokenization/lemmatization methods.
it is up to you to determine what to do when a word doesn't belong to a category (right now, it just stores "not found".