How to add a new line on a script - javascript

Hello I don't know if my title is helpful at all but here is my problem I want to make a type writer effect in JS, CSS, HTML, everything works fine apart from add a new line of text when I try added a new line it dose not show.
var str = "<p>I want to put text here then another line under this one</p>",
<!--var str = "<p>text here</p>",--> <!--This is what I tried to do to add a new line-->
i = 0,
isTag,
text;
(function type() {
text = str.slice(0, ++i);
if (text === str) return;
document.getElementById('typewriter').innerHTML = text;
var char = text.slice(-1);
if( char === '<' ) isTag = true;
if( char === '>' ) isTag = false;
if (isTag) return type();
setTimeout(type, 80);
}());
#typewriter {
color: lime;
text-align: center;
}
<div id="typewriter"></div>

var str = "My text\nSome more text";
var stra = str.split("");
var tw = document.getElementById("output");
function type(){
var char = stra.shift();
if (char){
tw.innerHTML += char;
setTimeout(type, 80);
}
}
type();
<pre id="output"></pre>

use <br />
var str = "<p>I want to put text here<br /> then another line under this one</p>";

Another possibility is to group paragragh elements using span and add display style property of span to block.
window.onload = function () {
var str = "<p><span>I want to put text here then another line under this one</span><span>text here</span></p>";
(function type(isInTagArg, indexArg) {
var index = indexArg || 0;
if (index >= str.length)
return;
var isInTag = isInTagArg || false;
if (isInTag == false) {
if (str.charAt(index) == '<') {
return type(true, index + 1);
} else {
document.getElementById('typewriter').innerHTML = str.substr(0, index + 1);
}
} else {
if (str.charAt(index) == '>') {
return type(false, index + 1);
}
return type(true, index + 1);
}
setTimeout(function() {type(false, index + 1)}, 80);
}());
}
#typewriter {
color: lime;
text-align: center;
}
#typewriter span
{
display: block;
}
<div id="typewriter"></div>

Related

JS switch case in every second character in a string

The function is supposed to take a string and switch the case of every second character. For example:
input: 'HelloWorld' output: 'HElLowoRlD'
input: 'abcdefg' output: 'aBcDeFg'
input: 'TONYmontana' output: 'ToNymOnTaNa'
My function doesn't work, why?
function switchCase(text) {
for (let i = 0; i < text.length; i++) {
if (i % 2 !== 0) {
if (text[i] === text[i].toLowerCase()) {
text[i] = text[i].toUpperCase();
} else {
text[i] = text[i].toLowerCase();
}
}
}
return text;
}
You should store the new value in a string and return that from the function:
function secondCase(text) {
let newValue = ''; // declare a variable
for (let i = 0; i < text.length; i++) {
if(i % 2 !== 0) {
if (text[i] === text[i].toLowerCase()) {
newValue += text[i].toUpperCase(); // concatenate the modified letter
}
else {
newValue += text[i].toLowerCase(); // concatenate the modified letter
}
}
else newValue += text[i]; // concatenate the unmodified letter
}
return newValue; // return
}
console.log(secondCase('HelloWorld'));
console.log(secondCase('abcdefg'));
console.log(secondCase('TONYmontana'));
Because javascript strings are immutable, so code like text[i] = 'a' doesn't work.
You can convert string to array to do what you want, eg:
function toggleCase(c) {
return c === c.toLowerCase() ? c.toUpperCase() : c.toLowerCase();
}
function secondCase(text) {
return text.split('').map((c, i) => i % 2 !== 0 ? toggleCase(c) : c).join('');
}
In js strings are immutable. So u may handle your case like below
function setCharAt(str, index, chr) {
return str.substring(0, index) + chr + str.substring(index + 1);
}
function switchCase(text) {
for (let i = 0; i < text.length; i++) {
if (i % 2 !== 0) {
if (text[i] === text[i].toLowerCase()) {
text = setCharAt(text, i, text[i].toUpperCase());
} else {
text = setCharAt(text, i, text[i].toLowerCase());
}
}
}
return text;
}
The string should be written to a new string because strings are immutable. Alternative approach: map and join an array representation of the word:
const ucEverySecondChr = word => [...word]
.map( (c, i) =>
i && i%2 != 0 ? c.toUpperCase() : c.toLowerCase() )
.join('');
console.log(ucEverySecondChr("helloworld"));
console.log(ucEverySecondChr("abcdefg"))
console.log(ucEverySecondChr("TONYmontana"));
[after comment] Immutability and function scope
let log = (...strs) => document.querySelector(`#result`)
.insertAdjacentHTML(`beforeend`, strs.join(`\n`) + `\n`);
log(`<h3>About immutability of strings</h3>`);
let str = `lt4`;
str[2] = `X`;
str.toUpperCase();
str.concat(`You won't see me here`);
log(`<code>let str = \`lt4\`;`,
`<span class="comment">Whatever you do to me, I (str) `+
`won't budge ...</span>`,
`str[2] = \`X\`;`,
`str.toUpperCase();`,
`str.concat(\`You won't see me here\`);</code>`,
`[str] is immutable, so it is not changed. It's value is <b>${str}</b>`);
log(``,`---`, `[str] passed to the function exists ` +
`only within the called function scope. ` +
`The original value does not change, so the ` +
`<i>return value</i> of <code class="inline">changeStr(str);</code> is <b>${
changeStr(str)}</b>, but the <i>value of [str]</i> is still => <b>${
str}</b>`);
str = changeStr(str);
log(``,`---`,
`<code>str = changeStr(str);`,
`<span class="comment">Hey, I (str) suddenly feel different</span></code>`,
`Now you changed the original [str] by (re)assigning the return ` +
`value of <code class="inline">changeStr(str)</code>. So ` +
`the value of [str] is now <b>${str}</b>`);
let strObj = new String(`Hello`);
strObj += ` world`;
log(``, `---`,
`<code>let strObj = new String(\`Hello\`);`,
`<span class="comment">I (strObj) am incomplete, change me!</span>`,
`strObj += \` world\`;</code>`,
`To be complete: ` +
`if your string variable is a <i>real instance</i> of `+
`<code class="inline">String</code> ` +
`you can change it without (re)assigning, `+
`so the value of [strObj] is now <b>${strObj}</b>`);
function changeStr(str) {
if (str.length < 4) {
str += " => there we are!";
}
return str;
}
body {
font: normal 12px/15px verdana, arial;
margin: 2em;
}
#result {
font-familiy: monospace3;
white-space: pre-wrap;
max-width: 75vw;
}
code {
color: green;
font-family: 'Courier New';
background-color: #EEE;
display: block;
padding: 2px;
max-width: 70vw;
}
.comment {
color: #777;
}
.comment:before {
content: '// ';
}
code.inline {
display: inline-block;
}
<div id="result"></div>

Format color while typing in textarea or pre

I'm trying to create a comments section that lets users #someone. When the user types #random and then space, I want it to be highlighted. So I've created something that searches and replaces the string, but I then when the html is replaced, it places the cursor at the beginning. Any way to solve this? Any other way of doing something like this?
$('#textarea').keyup(function() {
txt = this.innerText.split(" ")
new_txt = this.innerText
for (var i = txt.length - 1; i >= 0; i--) {
if (txt[i].startsWith('#') == false) {
delete txt[i]
}
}
txt = txt.sort().join(" ").trim().split(" ")
console.log(txt)
if (txt.length > 0 && txt[0] != "") {
for (var i = 0; i < txt.length; i++) {
new_txt = new_txt.replace(txt[i], '<mark>' + txt[i] + '</mark>')
}
$('#my_console_log').text(new_txt)
this.innerHTML = new_txt
}
});
pre {
border: solid black 1px;
}
mark {
background: blue;
color: red;
}
<script src="https://code.jquery.com/jquery-1.10.2.js"></script>
<title>Test page</title>
<form>
<pre id='textarea' contentEditable='true'></pre>
<div id="my_console_log"></div>
</form>
Here is a simple plugin available which can be useful to you,
Download the plugin and edit the file jquery.hashtags.js and remove the condition for #. You can also change the style as per your requirement.
(function($) {
$.fn.hashtags = function() {
$(this).wrap('<div class="jqueryHashtags"><div class="highlighter"></div></div>').unwrap().before('<div class="highlighter"></div>').wrap('<div class="typehead"></div></div>');
$(this).addClass("theSelector");
autosize($(this));
$(this).on("keyup", function() {
var str = $(this).val();
$(this).parent().parent().find(".highlighter").css("width",$(this).css("width"));
str = str.replace(/\n/g, '<br>');
if(!str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([a-zA-Z0-9]+)/g) && !str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([a-zA-Z0-9]+)/g) && !str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([\u0600-\u06FF]+)/g) && !str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([\u0600-\u06FF]+)/g)) {
// Remove below condition for hashtag.
if(!str.match(/#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#/g)) { //arabic support, CJK support
str = str.replace(/#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}else{
str = str.replace(/#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}
// Keep this condition.
if(!str.match(/#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#/g)) {
str = str.replace(/#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}else{
str = str.replace(/#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}
}
$(this).parent().parent().find(".highlighter").html(str);
});
$(this).parent().prev().on('click', function() {
$(this).parent().find(".theSelector").focus();
});
};
})(jQuery);
Instead of replacing the html just append a class with the color that you want

Write/delete text by character

I try to make change of any text by char by char (show text by char, delete text by char and show another one char by char).
What I actually have?
var i = 0;
var terms = ['text <b>bold</b>', 'longer text <b>bold</b>', '<b>bold</b> text 3'];
var timer = setInterval(function() {
var el = $('#el');
var wr = $('#wr');
setInterval(function() {
var str = el.html(); // doesn't work (still shows all content, not sliced one)
el.html(str.substring(0, str.length - 1));
}, 300 / str.length); // (300 / str.length) - do all animation in 300s
i++;
if (i === 3) {
i = 0;
}
}, 2500);
I have problem with slicing last char, so I don't get to adding new text so far :-(
One of variants I tried:
...
var text = terms[i].split('');
setInterval(function() {
el.html(text); // add sliced text in loop... not working as expected
// ...
text = text.slice(0, -1); // slice text by last character
}, 300 / text.length);
Okay, due to the comments a little bit explanation
I have an element
<span id=el>text <b>bold</b></span>
In 300ms interval I need to remove this text char by char.
<span id=el>text <b>bold</b></span>
<span id=el>text <b>bol</b></span>
<span id=el>text <b>bo</b></span>
<span id=el>text <b>b</b></span>
<span id=el>text <b></b></span> // remove 'b'
<span id=el>text</span> // remove ' ' and empty bold
<span id=el>tex</span>
<span id=el>te</span>
<span id=el>t</span>
<span id=el></span>
// now element is empty, since start it's 300ms
// and now I need to put there new text, char by char (whole phrase 300ms again)
<span id=el>l</span>
<span id=el>lo</span>
<span id=el>lon</span>
...
<span id=el>longer tex</span>
<span id=el>longer text</span>
<span id=el>longer text </span> // add space
<span id=el>longer text <b>b</b></span> // add 'b' into bold
<span id=el>longer text <b>bo</b></span>
<span id=el>longer text <b>bol</b></span>
<span id=el>longer text <b>bold</b></span>
// after 2500ms remove this char by char again and replace by third. Etc.
Etc. Can tou help me with that please? Tried that for last 2 days, many attempts, no result...
Thanks
This is how I would organize my code to shrink and grow an element. The only sensible way I can do this is to first replace < and > by the corresponding entity codes < and > so that these characters are not interpreted as actual tags. These 4-letter entity codes will be removed and added as a single unit. In this way you can shrink the string one quasi-character at a time from right to left and still have valid HTML at all times.
The Promise api (well, acually jQuery's $.Deferred version of this) is used to be able to know in a deterministic fashion when the shrink-grow cycle, which is an asynchronous process, has completed to then start the 2500 ms delay (which is another asynchronous process) before beginning anew.
$(function() {
function shrink_grow(resolve, term)
{
term = term.replace(/</g, '<').replace(/>/g, '>');
let el = $('#el');
el.html(term);
let interval = setInterval(shrinker, 30);
function shrinker()
{
let str = el.html();
let n = str.length >= 4 && (str.endsWith('>') || str.endsWith(`<`)) ? 4 : 1;
el.html(str.substr(0, str.length - n));
if (str.length === 0) {
clearInterval(interval);
interval = setInterval(grower, 30);
}
}
function grower()
{
let str = el.html();
if (str.length == term.length) {
clearInterval(interval);
resolve(undefined); // we are done
}
else if (str.length <= term.length - 4 && (term.substr(str.length + 1, 4) == '<' || term.substr(str.length + 1, 4) == '>')) {
el.html(term.substr(0, str.length + 4));
}
else {
el.html(term.substr(0, str.length + 1));
}
}
}
function pause(milliseconds)
{
// Create a new Deferred object
var deferred = $.Deferred();
// Resolve the Deferred after the amount of time specified by milliseconds
setTimeout(deferred.resolve, milliseconds);
return deferred.promise();
}
let terms = ['text <b>bold</b>', 'longer text <i>italic</i> text', '<b>bold</b> text 3'];
let term_number = 0;
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[term_number++]);
promise.then(function() {
pause(2500).then(function() {
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[term_number++]);
promise.then(function() {
pause(2500).then(function() {
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[term_number++]);
promise.then(function() {
console.log('done');
});
});
});
});
});
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<span id="el"></span>
And Keeping the tags intact
This is very complicated, however:
$(function() {
let TOTAL_TIME = 300;
function shrink_grow(resolve, term)
{
let el = $('#el');
let matches = term.match(/<([^>])+>(.*?)<\/\1>/); // look for internal tag
let internalTagTextLength = matches ? matches[2].length : 0;
let internalTagText = internalTagTextLength ? matches[2] : '';
let strlen = term.length;
if (matches) {
strlen -= matches[1].length * 2 + 5;
}
let shrinkGrowInterval = TOTAL_TIME / strlen;
if (shrinkGrowInterval < 16) {
shrinkGrowInterval = 16;
}
let interval = setInterval(grower, shrinkGrowInterval);
function shrinker()
{
let str = el.html();
let matches = str.match(/<([^>])+>(.*?)<\/\1>$/); // <i>text</i> at end of string, for example
if (matches) {
let str2 = matches[2];
if (str2.length < 2) { // get rid of entire tag
str2 = matches[0];
let n = str2.length;
let l = str.length - n;
el.html(str.substr(0, l));
if (l === 0) {
clearInterval(interval);
resolve(undefined); // we are done
}
}
else {
let str2a = str2.substr(0, str2.length - 1);
str = str.replace(/<([^>])+>(.*?)<\/\1>$/, '<' + matches[1] + '>' + str2a + '</' + matches[1] + '>');
el.html(str);
}
}
else {
el.html(str.substr(0, str.length - 1));
if (str.length === 0) {
clearInterval(interval);
resolve(undefined); // we are done
}
}
}
function grower()
{
let str = el.html();
if (str.length == term.length) {
clearInterval(interval);
interval = setInterval(shrinker, shrinkGrowInterval);
}
else {
let matches = term.substr(str.length).match(/^<([^>])+>(.*?)<\/\1>/); // start of <i>text</i>, for example?
if (matches) {
let str2 = '<' + matches[1] + '>' + matches[2].substr(0, 1) + '</' + matches[1] + '>';
el.html(str + str2);
}
else {
let matches = str.match(/<([^>])+>(.*?)<\/\1>$/); // <i>text</i> at end of string, for example
if (matches) {
let str2 = matches[2];
let l = str2.length;
if (l == internalTagTextLength) {
el.html(term.substr(0, str.length + 1));
}
else {
let str2a = internalTagText.substr(0, l + 1);
str = str.replace(/<([^>])+>(.*?)<\/\1>$/, '<' + matches[1] + '>' + str2a + '</' + matches[1] + '>');
el.html(str);
}
}
else {
el.html(term.substr(0, str.length + 1));
}
}
}
}
}
let terms = ['text <b>bold</b>', 'longer text <i>italic</i> text', '<b>bold</b> text 3'];
let nTerms = terms.length;
let termNumber = -1;
function callShrinkGrow()
{
if (++termNumber >= nTerms) {
termNumber = 0;
}
let deferred = $.Deferred();
let promise = deferred.promise();
shrink_grow(deferred.resolve, terms[termNumber]);
promise.then(callShrinkGrow);
}
callShrinkGrow();
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<body>
<span id="el"></span>
Okay according to my understanding, you want to show string/text char by char. So I think this will help you.
var str = "Hello World";
var c = "";
var i = 0;
(function loop(){
if (i++ > str.length-1) return;
c = c + str[i-1];
setTimeout(function(){
$("#charP").text(c);
loop();
}, 100);
})();
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div>
<p id="charP"></p>
</div>

Linebreak in textarea

I need to add line break when the text overflows
ex. if the text is
wwwwwwwwwwwwwww
wwwwwwwwwwwwwww
which is with in the textarea
the data should be with the line break.
Currently the data it is displaying is
wwwwwwwwwwwwwwwwwwwwwwwwwwwwww.
I need to show the exact way how the data is entered in textarea.
When the text overflows it moves to next line in the textarea,but when the data is retrieved the line break is not retained. It just displays as a single line
Or is there any way we can know that overflow occurs so that new line can be added?
I got the answer from the below fiddle which applies the line break to each next line
http://jsfiddle.net/pH79a/218/
html
<div>
<textarea rows="5" id="myTextarea" ></textarea>
</div>
<div id="pnlPreview"></div>
<div>
<button type="button" onclick="ApplyLineBreaks('myTextarea');">Apply Line Breaks</button>
</div>
javascript
function ApplyLineBreaks(strTextAreaId) {
var oTextarea = document.getElementById(strTextAreaId);
if (oTextarea.wrap) {
oTextarea.setAttribute("wrap", "off");
}
else {
oTextarea.setAttribute("wrap", "off");
var newArea = oTextarea.cloneNode(true);
newArea.value = oTextarea.value;
oTextarea.parentNode.replaceChild(newArea, oTextarea);
oTextarea = newArea;
}
var strRawValue = oTextarea.value;
oTextarea.value = "";
var nEmptyWidth = oTextarea.scrollWidth;
var nLastWrappingIndex = -1;
function testBreak(strTest) {
oTextarea.value = strTest;
return oTextarea.scrollWidth > nEmptyWidth;
}
function findNextBreakLength(strSource, nLeft, nRight) {
var nCurrent;
if(typeof(nLeft) == 'undefined') {
nLeft = 0;
nRight = -1;
nCurrent = 64;
}
else {
if (nRight == -1)
nCurrent = nLeft * 2;
else if (nRight - nLeft <= 1)
return Math.max(2, nRight);
else
nCurrent = nLeft + (nRight - nLeft) / 2;
}
var strTest = strSource.substr(0, nCurrent);
var bLonger = testBreak(strTest);
if(bLonger)
nRight = nCurrent;
else
{
if(nCurrent >= strSource.length)
return null;
nLeft = nCurrent;
}
return findNextBreakLength(strSource, nLeft, nRight);
}
var i = 0, j;
var strNewValue = "";
while (i < strRawValue.length) {
var breakOffset = findNextBreakLength(strRawValue.substr(i));
if (breakOffset === null) {
strNewValue += strRawValue.substr(i);
break;
}
nLastWrappingIndex = -1;
var nLineLength = breakOffset - 1;
for (j = nLineLength - 1; j >= 0; j--) {
var curChar = strRawValue.charAt(i + j);
if (curChar == ' ' || curChar == '-' || curChar == '+') {
nLineLength = j + 1;
break;
}
}
strNewValue += strRawValue.substr(i, nLineLength) + "\n";
i += nLineLength;
}
oTextarea.value = strNewValue;
oTextarea.setAttribute("wrap", "");
document.getElementById("pnlPreview").innerHTML = oTextarea.value.replace(new RegExp("\\n", "g"), "<br />");
}
word-wrap: break-word is your friend. Try this code.
textarea {
word-wrap: break-word;
}
Try cols attribute of the textarea
<textarea rows="4" cols="40">
wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww
</textarea>
Do this
<input type="text" style="overflow-wrap: break-word;">
In PHP u usually use nl2br() function.
Please refer to the below question, I am sure that it will help you!
jQuery convert line breaks to br (nl2br equivalent)
Use word-wrap for textarea reference to below link: https://www.w3schools.com/cssref/tryit.asp?filename=trycss3_word-wrap
This simple line of code can help you with the task:
<textarea id="textbox" rows="10" cols="30"></textarea>
But you should search it on web and there are many questions with the same context on the stackoverflow itself.
You can try it here but I think it is not needed:
https://jsfiddle.net/thisisdg/8f3y5r4d/
I hope this helps.

Wrapping Sentences within <p> Tags with <span>'s, But Keep Other Tags

To give you an idea of what I need, I have been using the below code to parse content within tags and wrap each sentence within tags so I can then interact with sentences on a page.
$('p').each(function() {
var sentences = $(this)
.text()
.replace(/(((?![.!?]['"]?\s).)*[.!?]['"]?)(\s|$)/g,
'<span class="sentence">$1</span>$3');
$(this).html(sentences);
});
However, the following line demonstrates my problem:
<p>This is a link and it is removed with the above code! Here is another sentence.</p>
Nested tags such as <a>, <img> etc...within <p> tags that I'm searching through are removed with the code that I'm using. I need to keep these tags intact, so the content stays the same within the <p> tags.
I need:
<p><span class="sentence">This is a link and it is removed with the above code!</sentence><sentence>Here is another sentence.</sentence></p>
After reading this barn-burner about parsing HTML with regex, I've concluded that I need to use a combo of an HTML parser of some sort to traverse through sub-tags within a <p> tag, and then use a regex to find the sentences. I think the regex I have listed above should work for most of my uses, if that helps.
So: how should I do it?
It is really difficult to tokenise language, reliably, into sentences and that is without the added complexity of throwing html into the equation. There are some applications etc out there that attempt to deal with Natural Language Processing, an example would be the Stanford Tokenizer with runs on Java (not Javascript)
And as people keep mentioning, a regex is not the solution to this problem, language is not regular so don't expect a Regular Expression only solution.
There is a question here on SO, Basic NLP in CoffeeScript or JavaScript — Punkt tokenizaton, simple trained Bayes models — where to start? Which I think summarises things fairly simply for Javascript.
Anyway, to at least give you a little something that you could play with, I knocked up a little code for you. This works reasonable well until the markup/language begins to resemble anything slightly complex or different, but ultimately fails the mark by a long way. But, it may be enough for what you need, I don't know.
CSS
.emphasis {
font-style: italic;
}
.bold {
font-weight: bold;
}
.emphasis.bold {
font-style: italic;
font-weight: bold;
}
.unidentified {
background-color: pink;
}
.sentence0 {
background-color: yellow;
}
.sentence1 {
background-color: green;
}
.sentence2 {
background-color: red;
}
.whitespace {
white-space: pre;
background-color: blue;
}
Javascript
/*jslint maxerr: 50, indent: 4, browser: true */
(function () {
"use strict";
var rxOpen = new RegExp("<[^\\/].+?>"),
rxClose = new RegExp("<\\/.+?>"),
rxWhitespace = new RegExp("^\\s+?"),
rxSupStart = new RegExp("^<sup\\b[^>]*>"),
rxSupEnd = new RegExp("<\/sup>"),
sentenceEnd = [],
color = 0,
rxIndex;
sentenceEnd.push(new RegExp("[^\\d][\\.!\\?]+"));
sentenceEnd.push(new RegExp("(?=([^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*?$)"));
sentenceEnd.push(new RegExp("(?![^\\(]*?\\))"));
sentenceEnd.push(new RegExp("(?![^\\[]*?\\])"));
sentenceEnd.push(new RegExp("(?![^\\{]*?\\})"));
sentenceEnd.push(new RegExp("(?![^\\|]*?\\|)"));
//sentenceEnd.push(new RegExp("(?![^\\\\]*?\\\\)"));
//sentenceEnd.push(new RegExp("(?![^\\/.]*\\/)")); // all could be a problem, but this one is problematic
rxIndex = new RegExp(sentenceEnd.reduce(function (previousValue, currentValue) {
return previousValue + currentValue.source;
}, ""));
function indexSentenceEnd(html) {
var index = html.search(rxIndex);
if (index !== -1) {
index += html.match(rxIndex)[0].length - 1;
}
return index;
}
function pushSpan(array, className, string, classNameOpt) {
if (className === "sentence") {
className += color % 2;
if (classNameOpt) {
className += " " + classNameOpt;
}
color += 1;
}
array.push('<span class="' + className + '">' + string + '</span>');
}
function addSupToPrevious(html, array) {
var sup = html.search(rxSupStart),
end = 0,
last;
if (sup !== -1) {
end = html.search(rxSupEnd);
if (end !== -1) {
last = array.pop();
end = end + 6;
array.push(last.slice(0, -7) + html.slice(0, end) + last.slice(-7));
}
}
return html.slice(end);
}
function leadingWhitespaces(html, array) {
var whitespace = html.search(rxWhitespace),
count = 0;
if (whitespace !== -1) {
count = html.match(rxWhitespace)[0].length;
pushSpan(array, "whitespace", html.slice(0, count));
}
return html.slice(count);
}
function paragraphIsSentence(html, array) {
var index = indexSentenceEnd(html);
if (index === -1 || index === html.length) {
pushSpan(array, "sentence", html, "paragraphIsSentence");
html = "";
}
return html;
}
function paragraphNoMarkup(html, array) {
var open = html.search(rxOpen),
index = 0;
if (open === -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
pushSpan(array, "sentence", html.slice(0, index += 1), "paragraphNoMarkup");
}
return html.slice(index);
}
function sentenceUncontained(html, array) {
var open = html.search(rxOpen),
index = 0,
close;
if (open !== -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
close = html.search(rxClose);
if (index < open || index > close) {
pushSpan(array, "sentence", html.slice(0, index += 1), "sentenceUncontained");
} else {
index = 0;
}
}
return html.slice(index);
}
function sentenceContained(html, array) {
var open = html.search(rxOpen),
index = 0,
close,
count;
if (open !== -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
close = html.search(rxClose);
if (index > open && index < close) {
count = html.match(rxClose)[0].length;
pushSpan(array, "sentence", html.slice(0, close + count), "sentenceContained");
index = close + count;
} else {
index = 0;
}
}
return html.slice(index);
}
function anythingElse(html, array) {
pushSpan(array, "sentence2", html, "anythingElse");
return "";
}
function guessSenetences() {
var paragraphs = document.getElementsByTagName("p");
Array.prototype.forEach.call(paragraphs, function (paragraph) {
var html = paragraph.innerHTML,
length = html.length,
array = [],
safety = 100;
while (length && safety) {
html = addSupToPrevious(html, array);
if (html.length === length) {
html = leadingWhitespaces(html, array);
if (html.length === length) {
html = paragraphIsSentence(html, array);
if (html.length === length) {
html = paragraphNoMarkup(html, array);
if (html.length === length) {
html = sentenceUncontained(html, array);
if (html.length === length) {
html = sentenceContained(html, array);
if (html.length === length) {
html = anythingElse(html, array);
}
}
}
}
}
}
length = html.length;
safety -= 1;
}
paragraph.innerHTML = array.join("");
});
}
guessSenetences();
}());
On jsfiddle
you need to use .html() instead of .text() if you want to keep tags intact.
Check below code and let me know if it doesn't work out.
DEMO
$('p').each(function() {
var sentences = $(this)
.html()
.replace(/(((?![.!?]['"]?\s).)*[.!?]['"]?)(\s|$)/g,
'<span class="sentence">$1</span>$3');
$(this).html(sentences);
});

Categories