The function is supposed to take a string and switch the case of every second character. For example:
input: 'HelloWorld' output: 'HElLowoRlD'
input: 'abcdefg' output: 'aBcDeFg'
input: 'TONYmontana' output: 'ToNymOnTaNa'
My function doesn't work, why?
function switchCase(text) {
for (let i = 0; i < text.length; i++) {
if (i % 2 !== 0) {
if (text[i] === text[i].toLowerCase()) {
text[i] = text[i].toUpperCase();
} else {
text[i] = text[i].toLowerCase();
}
}
}
return text;
}
You should store the new value in a string and return that from the function:
function secondCase(text) {
let newValue = ''; // declare a variable
for (let i = 0; i < text.length; i++) {
if(i % 2 !== 0) {
if (text[i] === text[i].toLowerCase()) {
newValue += text[i].toUpperCase(); // concatenate the modified letter
}
else {
newValue += text[i].toLowerCase(); // concatenate the modified letter
}
}
else newValue += text[i]; // concatenate the unmodified letter
}
return newValue; // return
}
console.log(secondCase('HelloWorld'));
console.log(secondCase('abcdefg'));
console.log(secondCase('TONYmontana'));
Because javascript strings are immutable, so code like text[i] = 'a' doesn't work.
You can convert string to array to do what you want, eg:
function toggleCase(c) {
return c === c.toLowerCase() ? c.toUpperCase() : c.toLowerCase();
}
function secondCase(text) {
return text.split('').map((c, i) => i % 2 !== 0 ? toggleCase(c) : c).join('');
}
In js strings are immutable. So u may handle your case like below
function setCharAt(str, index, chr) {
return str.substring(0, index) + chr + str.substring(index + 1);
}
function switchCase(text) {
for (let i = 0; i < text.length; i++) {
if (i % 2 !== 0) {
if (text[i] === text[i].toLowerCase()) {
text = setCharAt(text, i, text[i].toUpperCase());
} else {
text = setCharAt(text, i, text[i].toLowerCase());
}
}
}
return text;
}
The string should be written to a new string because strings are immutable. Alternative approach: map and join an array representation of the word:
const ucEverySecondChr = word => [...word]
.map( (c, i) =>
i && i%2 != 0 ? c.toUpperCase() : c.toLowerCase() )
.join('');
console.log(ucEverySecondChr("helloworld"));
console.log(ucEverySecondChr("abcdefg"))
console.log(ucEverySecondChr("TONYmontana"));
[after comment] Immutability and function scope
let log = (...strs) => document.querySelector(`#result`)
.insertAdjacentHTML(`beforeend`, strs.join(`\n`) + `\n`);
log(`<h3>About immutability of strings</h3>`);
let str = `lt4`;
str[2] = `X`;
str.toUpperCase();
str.concat(`You won't see me here`);
log(`<code>let str = \`lt4\`;`,
`<span class="comment">Whatever you do to me, I (str) `+
`won't budge ...</span>`,
`str[2] = \`X\`;`,
`str.toUpperCase();`,
`str.concat(\`You won't see me here\`);</code>`,
`[str] is immutable, so it is not changed. It's value is <b>${str}</b>`);
log(``,`---`, `[str] passed to the function exists ` +
`only within the called function scope. ` +
`The original value does not change, so the ` +
`<i>return value</i> of <code class="inline">changeStr(str);</code> is <b>${
changeStr(str)}</b>, but the <i>value of [str]</i> is still => <b>${
str}</b>`);
str = changeStr(str);
log(``,`---`,
`<code>str = changeStr(str);`,
`<span class="comment">Hey, I (str) suddenly feel different</span></code>`,
`Now you changed the original [str] by (re)assigning the return ` +
`value of <code class="inline">changeStr(str)</code>. So ` +
`the value of [str] is now <b>${str}</b>`);
let strObj = new String(`Hello`);
strObj += ` world`;
log(``, `---`,
`<code>let strObj = new String(\`Hello\`);`,
`<span class="comment">I (strObj) am incomplete, change me!</span>`,
`strObj += \` world\`;</code>`,
`To be complete: ` +
`if your string variable is a <i>real instance</i> of `+
`<code class="inline">String</code> ` +
`you can change it without (re)assigning, `+
`so the value of [strObj] is now <b>${strObj}</b>`);
function changeStr(str) {
if (str.length < 4) {
str += " => there we are!";
}
return str;
}
body {
font: normal 12px/15px verdana, arial;
margin: 2em;
}
#result {
font-familiy: monospace3;
white-space: pre-wrap;
max-width: 75vw;
}
code {
color: green;
font-family: 'Courier New';
background-color: #EEE;
display: block;
padding: 2px;
max-width: 70vw;
}
.comment {
color: #777;
}
.comment:before {
content: '// ';
}
code.inline {
display: inline-block;
}
<div id="result"></div>
Related
Suppose there is string wrapped with two * characters (from both starting and ending). The resulting string should be converted in bold text, similarly as when the string is wrapped with two characters _, which should produce an italic string.
My code in React is the following:
import * as React from 'react';
export default function App() {
const [boldText, setBoldText] = React.useState('' as any);
const [res, setRes] = React.useState('' as any);
let speChar: any = '*_~`';
let openingTagsList: any = {
'*': '<b>',
_: '<i>',
'*_': '<b><i>',
'_*': '<b><i>',
};
let closingTagsList: any = {
'*': '</b>',
_: '</i>',
'*_': '</b></i>',
'_*': '</b></i>',
};
let openingTagsListKeys = Object?.keys(openingTagsList);
let closingTagsListKeys = Object?.keys(closingTagsList);
function strFont(e) {
let str = e.target.value;
let matchedSplChar = '';
for (let i = 0; i < str.length; i++) {
if (matchedSplChar.indexOf(str[i]) === -1) {
if (speChar.indexOf(str[i]) !== -1) matchedSplChar += str[i];
}
}
if (matchedSplChar as any) {
let FL = str[str.indexOf(matchedSplChar, 0)];
let SL = str[str.indexOf(matchedSplChar, 1)];
let startingTags;
let closingTags;
for (let key in openingTagsListKeys) {
if (matchedSplChar === openingTagsListKeys[key])
startingTags = openingTagsList[matchedSplChar];
}
for (let key in closingTagsListKeys) {
if (matchedSplChar === closingTagsListKeys[key])
closingTags = closingTagsList[matchedSplChar];
}
if (FL && SL && FL == SL) {
let replaceTags = str
.replace(FL, startingTags)
.replace(SL, closingTags);
let divTag = document.createElement('div');
divTag.innerHTML = replaceTags;
let htmlObj: any = divTag.firstChild;
if (htmlObj.innerHTML) setRes(htmlObj);
setBoldText(e.target.value);
} else {
setBoldText(e.target.value);
}
} else {
setBoldText(e.target.value);
}
}
return (
<div>
<input type="text" value={boldText || ''} onChange={(e) => strFont(e)} />
{res ? <res.tagName>{res?.innerHTML}</res.tagName> : ''}
<TextFormation />
</div>
);
}
, gives the output:
, instead of both strings being bold. How can I achieve it then?
From the above comments ...
"#KavyaPathak ... which effectively means that anything in between two * characters is going to be wrapped into , like e.g. ... foo *bar* *baz* *foo ... becoming ... foo <b>bar</b> <b> </b> <b>baz</b> <b> </b> *foo ... which renders ... "foo bar __ baz __ *foo"." – Peter Seliger
"#PeterSeliger yes" – Kavya Pathak
In case the OP's confirmation remains, the commented link to a regex and replace based approach already represents one possible solution.
Both regular expressions ...
/\*([^*]+)(?=\*)/g
/_([^_]+)(?=_)/g
... follow the same pattern.
match a control character (either * or _) ... \* respectively _ ..,
match and capture any character sequence which does not contain such a control character ... ([^*]+) respectively ([^_]+) ..,
until a positive lookahead ... (?=\*) respectively (?=_) ... confirms the presence of the next such control character (which excludes this very character from the entire match).
function getMarkupFromPseudoMarkdown(value) {
return value
.replace(/\*([^*]+)(?=\*)/g, '<b>$1</b> ')
.replace(/_([^_]+)(?=_)/g, '<i>$1</i> ')
.replace(/\n/g, '<br\/>')
.replace(/\s+/g, ' ')
.trim();
}
function displayCreatedMarkup({ currentTarget }) {
const markup = getMarkupFromPseudoMarkdown(currentTarget.value);
document.querySelector('code pre').textContent = markup;
document.querySelector('output').innerHTML = markup;
}
document
.querySelector('textarea')
.addEventListener('input', displayCreatedMarkup)
textarea, output { width: 49%; }
output { float: right; font-size: 87%; margin-top: 2px; }
code pre { background-color: #eee; white-space: break-spaces; word-break: break-all;}
<textarea cols="32" rows="8" placeholder="... put pseudo markdown here ..."></textarea>
<output></output>
<code><pre></pre></code>
And in case the OP figures that the above approach does not solve the OP's problem especially not for some bold/italic edge cases, then the OP might consider a mainly split and reduce based approach which handles such edge cases by looking up the previous (matchList[idx - 1]) and the next (matchList[idx + 1]) (control) character of a matching (neither * nor _) token.
function getMarkupFromPseudoMarkdown(value) {
return value
.split(/(\*)/)
.reduce((markup, match, idx, matchList) => {
if (match !== '*') {
if (
matchList[idx - 1] === '*' &&
matchList[idx + 1] === '*'
) {
markup = `${ markup } <b>${ match }</b> `;
} else {
markup = `${ markup }${ match }`;
}
}
return markup
})
.split(/(_)/)
.reduce((markup, match, idx, matchList) => {
if (match !== '_') {
if (
matchList[idx - 1] === '_' &&
matchList[idx + 1] === '_'
) {
markup = `${ markup } <i>${ match }</i> `;
} else {
markup = `${ markup }${ match }`;
}
}
return markup
})
.replace(/\n/g, '<br\/>')
.replace(/\s+/g, ' ')
.trim();
}
function displayCreatedMarkup({ currentTarget }) {
const markup = getMarkupFromPseudoMarkdown(currentTarget.value);
document.querySelector('code pre').textContent = markup;
document.querySelector('output').innerHTML = markup;
}
document
.querySelector('textarea')
.addEventListener('input', displayCreatedMarkup)
textarea, output { width: 49%; }
output { float: right; font-size: 87%; margin-top: 2px; }
code pre { background-color: #eee; white-space: break-spaces; word-break: break-all;}
<textarea cols="32" rows="8" placeholder="... put pseudo markdown here ..."></textarea>
<output></output>
<code><pre></pre></code>
I try to make change of any text by char by char (show text by char, delete text by char and show another one char by char).
What I actually have?
var i = 0;
var terms = ['text <b>bold</b>', 'longer text <b>bold</b>', '<b>bold</b> text 3'];
var timer = setInterval(function() {
var el = $('#el');
var wr = $('#wr');
setInterval(function() {
var str = el.html(); // doesn't work (still shows all content, not sliced one)
el.html(str.substring(0, str.length - 1));
}, 300 / str.length); // (300 / str.length) - do all animation in 300s
i++;
if (i === 3) {
i = 0;
}
}, 2500);
I have problem with slicing last char, so I don't get to adding new text so far :-(
One of variants I tried:
...
var text = terms[i].split('');
setInterval(function() {
el.html(text); // add sliced text in loop... not working as expected
// ...
text = text.slice(0, -1); // slice text by last character
}, 300 / text.length);
Okay, due to the comments a little bit explanation
I have an element
<span id=el>text <b>bold</b></span>
In 300ms interval I need to remove this text char by char.
<span id=el>text <b>bold</b></span>
<span id=el>text <b>bol</b></span>
<span id=el>text <b>bo</b></span>
<span id=el>text <b>b</b></span>
<span id=el>text <b></b></span> // remove 'b'
<span id=el>text</span> // remove ' ' and empty bold
<span id=el>tex</span>
<span id=el>te</span>
<span id=el>t</span>
<span id=el></span>
// now element is empty, since start it's 300ms
// and now I need to put there new text, char by char (whole phrase 300ms again)
<span id=el>l</span>
<span id=el>lo</span>
<span id=el>lon</span>
...
<span id=el>longer tex</span>
<span id=el>longer text</span>
<span id=el>longer text </span> // add space
<span id=el>longer text <b>b</b></span> // add 'b' into bold
<span id=el>longer text <b>bo</b></span>
<span id=el>longer text <b>bol</b></span>
<span id=el>longer text <b>bold</b></span>
// after 2500ms remove this char by char again and replace by third. Etc.
Etc. Can tou help me with that please? Tried that for last 2 days, many attempts, no result...
Thanks
This is how I would organize my code to shrink and grow an element. The only sensible way I can do this is to first replace < and > by the corresponding entity codes < and > so that these characters are not interpreted as actual tags. These 4-letter entity codes will be removed and added as a single unit. In this way you can shrink the string one quasi-character at a time from right to left and still have valid HTML at all times.
The Promise api (well, acually jQuery's $.Deferred version of this) is used to be able to know in a deterministic fashion when the shrink-grow cycle, which is an asynchronous process, has completed to then start the 2500 ms delay (which is another asynchronous process) before beginning anew.
$(function() {
function shrink_grow(resolve, term)
{
term = term.replace(/</g, '<').replace(/>/g, '>');
let el = $('#el');
el.html(term);
let interval = setInterval(shrinker, 30);
function shrinker()
{
let str = el.html();
let n = str.length >= 4 && (str.endsWith('>') || str.endsWith(`<`)) ? 4 : 1;
el.html(str.substr(0, str.length - n));
if (str.length === 0) {
clearInterval(interval);
interval = setInterval(grower, 30);
}
}
function grower()
{
let str = el.html();
if (str.length == term.length) {
clearInterval(interval);
resolve(undefined); // we are done
}
else if (str.length <= term.length - 4 && (term.substr(str.length + 1, 4) == '<' || term.substr(str.length + 1, 4) == '>')) {
el.html(term.substr(0, str.length + 4));
}
else {
el.html(term.substr(0, str.length + 1));
}
}
}
function pause(milliseconds)
{
// Create a new Deferred object
var deferred = $.Deferred();
// Resolve the Deferred after the amount of time specified by milliseconds
setTimeout(deferred.resolve, milliseconds);
return deferred.promise();
}
let terms = ['text <b>bold</b>', 'longer text <i>italic</i> text', '<b>bold</b> text 3'];
let term_number = 0;
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[term_number++]);
promise.then(function() {
pause(2500).then(function() {
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[term_number++]);
promise.then(function() {
pause(2500).then(function() {
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[term_number++]);
promise.then(function() {
console.log('done');
});
});
});
});
});
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<span id="el"></span>
And Keeping the tags intact
This is very complicated, however:
$(function() {
let TOTAL_TIME = 300;
function shrink_grow(resolve, term)
{
let el = $('#el');
let matches = term.match(/<([^>])+>(.*?)<\/\1>/); // look for internal tag
let internalTagTextLength = matches ? matches[2].length : 0;
let internalTagText = internalTagTextLength ? matches[2] : '';
let strlen = term.length;
if (matches) {
strlen -= matches[1].length * 2 + 5;
}
let shrinkGrowInterval = TOTAL_TIME / strlen;
if (shrinkGrowInterval < 16) {
shrinkGrowInterval = 16;
}
let interval = setInterval(grower, shrinkGrowInterval);
function shrinker()
{
let str = el.html();
let matches = str.match(/<([^>])+>(.*?)<\/\1>$/); // <i>text</i> at end of string, for example
if (matches) {
let str2 = matches[2];
if (str2.length < 2) { // get rid of entire tag
str2 = matches[0];
let n = str2.length;
let l = str.length - n;
el.html(str.substr(0, l));
if (l === 0) {
clearInterval(interval);
resolve(undefined); // we are done
}
}
else {
let str2a = str2.substr(0, str2.length - 1);
str = str.replace(/<([^>])+>(.*?)<\/\1>$/, '<' + matches[1] + '>' + str2a + '</' + matches[1] + '>');
el.html(str);
}
}
else {
el.html(str.substr(0, str.length - 1));
if (str.length === 0) {
clearInterval(interval);
resolve(undefined); // we are done
}
}
}
function grower()
{
let str = el.html();
if (str.length == term.length) {
clearInterval(interval);
interval = setInterval(shrinker, shrinkGrowInterval);
}
else {
let matches = term.substr(str.length).match(/^<([^>])+>(.*?)<\/\1>/); // start of <i>text</i>, for example?
if (matches) {
let str2 = '<' + matches[1] + '>' + matches[2].substr(0, 1) + '</' + matches[1] + '>';
el.html(str + str2);
}
else {
let matches = str.match(/<([^>])+>(.*?)<\/\1>$/); // <i>text</i> at end of string, for example
if (matches) {
let str2 = matches[2];
let l = str2.length;
if (l == internalTagTextLength) {
el.html(term.substr(0, str.length + 1));
}
else {
let str2a = internalTagText.substr(0, l + 1);
str = str.replace(/<([^>])+>(.*?)<\/\1>$/, '<' + matches[1] + '>' + str2a + '</' + matches[1] + '>');
el.html(str);
}
}
else {
el.html(term.substr(0, str.length + 1));
}
}
}
}
}
let terms = ['text <b>bold</b>', 'longer text <i>italic</i> text', '<b>bold</b> text 3'];
let nTerms = terms.length;
let termNumber = -1;
function callShrinkGrow()
{
if (++termNumber >= nTerms) {
termNumber = 0;
}
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[termNumber]);
promise.then(callShrinkGrow);
}
callShrinkGrow();
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<body>
<span id="el"></span>
Okay according to my understanding, you want to show string/text char by char. So I think this will help you.
var str = "Hello World";
var c = "";
var i = 0;
(function loop(){
if (i++ > str.length-1) return;
c = c + str[i-1];
setTimeout(function(){
$("#charP").text(c);
loop();
}, 100);
})();
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div>
<p id="charP"></p>
</div>
I'm trying to pretty print my json data in html, and do some syntactic coloration.
But I'm having a little issue with empty values (empty list, empty string) in my code.
Here is the code :
if (!library)
var library = {};
function isInt(value) {
return !isNaN(value) && (function(x) { return (x | 0) === x; })(parseFloat(value))
};
library.json = {
replacer: function(match, pIndent, pKey, pVal, pEnd) {
var int = '<span class=json-int>';
var key = '<span class=json-key>';
var val = '<span class=json-value>';
var str = '<span class=json-string>';
var r = pIndent || '';
if (pKey)
r = r + key + pKey.replace(/[": ]/g, '') + '</span>: ';
if (pVal)
//r = r + (pVal[0] == '"'i ? str : val) + pVal + '</span>';
r = r + (isInt(pVal) ? int : str) + pVal + '</span>';
return r + (pEnd || '');
},
prettyPrint: function(obj) {
var jsonLine = /^( *)("[\w]+": )?("[^"]*"|[\w.+-]*)?([,[{])?$/mg;
return JSON.stringify(obj, null, 3)
.replace(/&/g, '&').replace(/\\"/g, '"')
.replace(/</g, '<').replace(/>/g, '>')
.replace(jsonLine, library.json.replacer);
}
};
var lint = {
"LintResult": "FAILED",
"CFN_NAG": [
{
"filename": "sam.yaml",
"file_results": {
"failure_count": 0,
"violations": []
}
}
],
"Comment": "If above CFN_NAG key has None value, check code execution log for errors/exceptions"
}
$('#lint').html(library.json.prettyPrint(lint));
//document.getElementById("json").innerHTML = JSON.stringify(data, undefined, 2);
pre {
background-color: ghostwhite;
bovrder: 1px solid silver;
padding: 10px 20px;
margin: 20px;
}
.json-key {
color: brown;
}
.json-value {
color: navy;
}
.json-string {
color: olive;
}
.json-int {
color: fuchsia;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div style="background-color:lightblue">
<h1>JSON Data:</h1>
<pre id="lint"></pre>
</div>
<p>A JSON string with 12 spaces per indentation.</p>
In the above code, the lint json variable has an empty list value for the violations item, and then this key is not print with the right color, it's just not processed.
I tried different way but I don't understand what is wrong.
You can try the code your self and will notice that the syntactic coloration doesn't work for this last item.
This might help you out:
function output(inp) {
document.body.appendChild(document.createElement('pre')).innerHTML = inp;
}
function syntaxHighlight(json) {
json = json.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
return json.replace(/("(\\u[a-zA-Z0-9]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?)/g, function (match) {
var cls = 'number';
if (/^"/.test(match)) {
if (/:$/.test(match)) {
cls = 'key';
} else {
cls = 'string';
}
} else if (/true|false/.test(match)) {
cls = 'boolean';
} else if (/null/.test(match)) {
cls = 'null';
}
return '<span class="' + cls + '">' + match + '</span>';
});
}
var obj = {
"LintResult": "FAILED",
"CFN_NAG": [
{
"filename": "sam.yaml",
"file_results": {
"failure_count": 0,
"violations": []
}
}
],
"Comment": "If above CFN_NAG key has None value, check code execution log for errors/exceptions"
};
var str = JSON.stringify(obj, undefined, 4);
output(syntaxHighlight(str));
pre {outline: 1px solid #ccc; padding: 5px; margin: 5px; background: ghostwhite }
.string { color: olive; }
.number { color: fuchsia; }
.boolean { color: navy; }
.null { color: magenta; }
.key { color: brown; }
Hello I don't know if my title is helpful at all but here is my problem I want to make a type writer effect in JS, CSS, HTML, everything works fine apart from add a new line of text when I try added a new line it dose not show.
var str = "<p>I want to put text here then another line under this one</p>",
<!--var str = "<p>text here</p>",--> <!--This is what I tried to do to add a new line-->
i = 0,
isTag,
text;
(function type() {
text = str.slice(0, ++i);
if (text === str) return;
document.getElementById('typewriter').innerHTML = text;
var char = text.slice(-1);
if( char === '<' ) isTag = true;
if( char === '>' ) isTag = false;
if (isTag) return type();
setTimeout(type, 80);
}());
#typewriter {
color: lime;
text-align: center;
}
<div id="typewriter"></div>
var str = "My text\nSome more text";
var stra = str.split("");
var tw = document.getElementById("output");
function type(){
var char = stra.shift();
if (char){
tw.innerHTML += char;
setTimeout(type, 80);
}
}
type();
<pre id="output"></pre>
use <br />
var str = "<p>I want to put text here<br /> then another line under this one</p>";
Another possibility is to group paragragh elements using span and add display style property of span to block.
window.onload = function () {
var str = "<p><span>I want to put text here then another line under this one</span><span>text here</span></p>";
(function type(isInTagArg, indexArg) {
var index = indexArg || 0;
if (index >= str.length)
return;
var isInTag = isInTagArg || false;
if (isInTag == false) {
if (str.charAt(index) == '<') {
return type(true, index + 1);
} else {
document.getElementById('typewriter').innerHTML = str.substr(0, index + 1);
}
} else {
if (str.charAt(index) == '>') {
return type(false, index + 1);
}
return type(true, index + 1);
}
setTimeout(function() {type(false, index + 1)}, 80);
}());
}
#typewriter {
color: lime;
text-align: center;
}
#typewriter span
{
display: block;
}
<div id="typewriter"></div>
To give you an idea of what I need, I have been using the below code to parse content within tags and wrap each sentence within tags so I can then interact with sentences on a page.
$('p').each(function() {
var sentences = $(this)
.text()
.replace(/(((?![.!?]['"]?\s).)*[.!?]['"]?)(\s|$)/g,
'<span class="sentence">$1</span>$3');
$(this).html(sentences);
});
However, the following line demonstrates my problem:
<p>This is a link and it is removed with the above code! Here is another sentence.</p>
Nested tags such as <a>, <img> etc...within <p> tags that I'm searching through are removed with the code that I'm using. I need to keep these tags intact, so the content stays the same within the <p> tags.
I need:
<p><span class="sentence">This is a link and it is removed with the above code!</sentence><sentence>Here is another sentence.</sentence></p>
After reading this barn-burner about parsing HTML with regex, I've concluded that I need to use a combo of an HTML parser of some sort to traverse through sub-tags within a <p> tag, and then use a regex to find the sentences. I think the regex I have listed above should work for most of my uses, if that helps.
So: how should I do it?
It is really difficult to tokenise language, reliably, into sentences and that is without the added complexity of throwing html into the equation. There are some applications etc out there that attempt to deal with Natural Language Processing, an example would be the Stanford Tokenizer with runs on Java (not Javascript)
And as people keep mentioning, a regex is not the solution to this problem, language is not regular so don't expect a Regular Expression only solution.
There is a question here on SO, Basic NLP in CoffeeScript or JavaScript — Punkt tokenizaton, simple trained Bayes models — where to start? Which I think summarises things fairly simply for Javascript.
Anyway, to at least give you a little something that you could play with, I knocked up a little code for you. This works reasonable well until the markup/language begins to resemble anything slightly complex or different, but ultimately fails the mark by a long way. But, it may be enough for what you need, I don't know.
CSS
.emphasis {
font-style: italic;
}
.bold {
font-weight: bold;
}
.emphasis.bold {
font-style: italic;
font-weight: bold;
}
.unidentified {
background-color: pink;
}
.sentence0 {
background-color: yellow;
}
.sentence1 {
background-color: green;
}
.sentence2 {
background-color: red;
}
.whitespace {
white-space: pre;
background-color: blue;
}
Javascript
/*jslint maxerr: 50, indent: 4, browser: true */
(function () {
"use strict";
var rxOpen = new RegExp("<[^\\/].+?>"),
rxClose = new RegExp("<\\/.+?>"),
rxWhitespace = new RegExp("^\\s+?"),
rxSupStart = new RegExp("^<sup\\b[^>]*>"),
rxSupEnd = new RegExp("<\/sup>"),
sentenceEnd = [],
color = 0,
rxIndex;
sentenceEnd.push(new RegExp("[^\\d][\\.!\\?]+"));
sentenceEnd.push(new RegExp("(?=([^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*?$)"));
sentenceEnd.push(new RegExp("(?![^\\(]*?\\))"));
sentenceEnd.push(new RegExp("(?![^\\[]*?\\])"));
sentenceEnd.push(new RegExp("(?![^\\{]*?\\})"));
sentenceEnd.push(new RegExp("(?![^\\|]*?\\|)"));
//sentenceEnd.push(new RegExp("(?![^\\\\]*?\\\\)"));
//sentenceEnd.push(new RegExp("(?![^\\/.]*\\/)")); // all could be a problem, but this one is problematic
rxIndex = new RegExp(sentenceEnd.reduce(function (previousValue, currentValue) {
return previousValue + currentValue.source;
}, ""));
function indexSentenceEnd(html) {
var index = html.search(rxIndex);
if (index !== -1) {
index += html.match(rxIndex)[0].length - 1;
}
return index;
}
function pushSpan(array, className, string, classNameOpt) {
if (className === "sentence") {
className += color % 2;
if (classNameOpt) {
className += " " + classNameOpt;
}
color += 1;
}
array.push('<span class="' + className + '">' + string + '</span>');
}
function addSupToPrevious(html, array) {
var sup = html.search(rxSupStart),
end = 0,
last;
if (sup !== -1) {
end = html.search(rxSupEnd);
if (end !== -1) {
last = array.pop();
end = end + 6;
array.push(last.slice(0, -7) + html.slice(0, end) + last.slice(-7));
}
}
return html.slice(end);
}
function leadingWhitespaces(html, array) {
var whitespace = html.search(rxWhitespace),
count = 0;
if (whitespace !== -1) {
count = html.match(rxWhitespace)[0].length;
pushSpan(array, "whitespace", html.slice(0, count));
}
return html.slice(count);
}
function paragraphIsSentence(html, array) {
var index = indexSentenceEnd(html);
if (index === -1 || index === html.length) {
pushSpan(array, "sentence", html, "paragraphIsSentence");
html = "";
}
return html;
}
function paragraphNoMarkup(html, array) {
var open = html.search(rxOpen),
index = 0;
if (open === -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
pushSpan(array, "sentence", html.slice(0, index += 1), "paragraphNoMarkup");
}
return html.slice(index);
}
function sentenceUncontained(html, array) {
var open = html.search(rxOpen),
index = 0,
close;
if (open !== -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
close = html.search(rxClose);
if (index < open || index > close) {
pushSpan(array, "sentence", html.slice(0, index += 1), "sentenceUncontained");
} else {
index = 0;
}
}
return html.slice(index);
}
function sentenceContained(html, array) {
var open = html.search(rxOpen),
index = 0,
close,
count;
if (open !== -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
close = html.search(rxClose);
if (index > open && index < close) {
count = html.match(rxClose)[0].length;
pushSpan(array, "sentence", html.slice(0, close + count), "sentenceContained");
index = close + count;
} else {
index = 0;
}
}
return html.slice(index);
}
function anythingElse(html, array) {
pushSpan(array, "sentence2", html, "anythingElse");
return "";
}
function guessSenetences() {
var paragraphs = document.getElementsByTagName("p");
Array.prototype.forEach.call(paragraphs, function (paragraph) {
var html = paragraph.innerHTML,
length = html.length,
array = [],
safety = 100;
while (length && safety) {
html = addSupToPrevious(html, array);
if (html.length === length) {
html = leadingWhitespaces(html, array);
if (html.length === length) {
html = paragraphIsSentence(html, array);
if (html.length === length) {
html = paragraphNoMarkup(html, array);
if (html.length === length) {
html = sentenceUncontained(html, array);
if (html.length === length) {
html = sentenceContained(html, array);
if (html.length === length) {
html = anythingElse(html, array);
}
}
}
}
}
}
length = html.length;
safety -= 1;
}
paragraph.innerHTML = array.join("");
});
}
guessSenetences();
}());
On jsfiddle
you need to use .html() instead of .text() if you want to keep tags intact.
Check below code and let me know if it doesn't work out.
DEMO
$('p').each(function() {
var sentences = $(this)
.html()
.replace(/(((?![.!?]['"]?\s).)*[.!?]['"]?)(\s|$)/g,
'<span class="sentence">$1</span>$3');
$(this).html(sentences);
});