Paste MS Word as Text but keep some styling Jquery

Paste MS Word as Text but keep some styling Jquery - javascript

My customer wants a UI that he will copy some text from MS Word and paste. After he pastes, my script should delete all the styling and tags of Word, but keep paragraphs (html p tags) and bold styling (html b tag).
I've prepared a UI with html5 contenteditable, there are many editable sections with different kind of content (headers, 1 and 2 column paragraphs etc.)
I've found a script that working well, doing exact the same thing I want. But if I want to use different selector (the script uses #content selector), it does not work. I want to use a class, since many sections with .editable_div class can be added to UI.
Maybe it's because I'm not so good at JS and Jquery. The script is below, how can I make it work with many sections with a class name on the page?
Thanks so much...
Index.html:
<!DOCTYPE html>
<html>
<body>
<div id="src">Source here...</div>
<div id="editor" contenteditable="true">
<p>Place MS-Word text here...</p>
</div>
</body>
</html>
and script:
(function($) {
$.fn.msword_html_filter = function(options) {
var settings = $.extend( {}, options);
function word_filter(editor){
var content = editor.html();
// Word comments like conditional comments etc
content = content.replace(/<!--[\s\S]+?-->/gi, '');
// Remove comments, scripts (e.g., msoShowComment), XML tag, VML content,
// MS Office namespaced tags, and a few other tags
content = content.replace(/<(!|script[^>]*>.*?<\/script(?=[>\s])|\/?(\?xml(:\w+)?|img|meta|link|style|\w:\w+)(?=[\s\/>]))[^>]*>/gi, '');
// Convert <s> into <strike> for line-though
content = content.replace(/<(\/?)s>/gi, "<$1strike>");
// Replace nbsp entites to char since it's easier to handle
//content = content.replace(/ /gi, "\u00a0");
content = content.replace(/ /gi, ' ');
// Convert <span style="mso-spacerun:yes">___</span> to string of alternating
// breaking/non-breaking spaces of same length
content = content.replace(/<span\s+style\s*=\s*"\s*mso-spacerun\s*:\s*yes\s*;?\s*"\s*>([\s\u00a0]*)<\/span>/gi, function(str, spaces) {
return (spaces.length > 0) ? spaces.replace(/./, " ").slice(Math.floor(spaces.length/2)).split("").join("\u00a0") : '';
});
editor.html(content);
// Parse out list indent level for lists
$('p', editor).each(function(){
var str = $(this).attr('style');
var matches = /mso-list:\w+ \w+([0-9]+)/.exec(str);
if (matches) {
$(this).data('_listLevel', parseInt(matches[1], 10));
}
});
// Parse Lists
var last_level=0;
var pnt = null;
$('p', editor).each(function(){
var cur_level = $(this).data('_listLevel');
if(cur_level != undefined){
var txt = $(this).text();
var list_tag = '<ul></ul>';
if (/^\s*\w+\./.test(txt)) {
var matches = /([0-9])\./.exec(txt);
if (matches) {
var start = parseInt(matches[1], 10);
list_tag = start>1 ? '<ol start="' + start + '"></ol>' : '<ol></ol>';
}else{
list_tag = '<ol></ol>';
}
}
if(cur_level>last_level){
if(last_level==0){
$(this).before(list_tag);
pnt = $(this).prev();
}else{
pnt = $(list_tag).appendTo(pnt);
}
}
if(cur_level<last_level){
for(var i=0; i<last_level-cur_level; i++){
pnt = pnt.parent();
}
}
$('span:first', this).remove();
pnt.append('<li>' + $(this).html() + '</li>')
$(this).remove();
last_level = cur_level;
}else{
last_level = 0;
}
})
$('[style]', editor).removeAttr('style');
$('[align]', editor).removeAttr('align');
$('span', editor).replaceWith(function() {return $(this).contents();});
$('span:empty', editor).remove();
$("[class^='Mso']", editor).removeAttr('class');
$('p:empty', editor).remove();
}
return this.each(function() {
$(this).on('keyup', function(){
$('#src').text($('#editor').html());
var content = $(this).html();
if (/class="?Mso|style="[^"]*\bmso-|style='[^'']*\bmso-|w:WordDocument/i.test( content )) {
word_filter( $(this) );
}
});
});
};
})( jQuery )
$(function(){
$('#editor').msword_html_filter();
$('#src').text($('#editor').html());
})

I found it. Selector is not the problem. $('.editable_div').msword_html_filter(); is normally working.
But if the parent div is contenteditable = true, the script is not working for children divs. I still dont know why. Thanks you all.

Related

Javascript: How to check/compare if a String contains some tag?

I'd like to use pure JS to check if some String, the textareas .innerHTML = newContent below, contains some tag (h1in my case) at the beginning (=as first child). What would be the best way to do this?
Thanks!
function submitNewSectionContent(e) {
for (var i = 0; i < sections.length; i++)
let newHeading = document.getElementById('edit-title').value;
/* edit-title is text-input*/
let newContent = document.getElementById('edit-sectionText').innerHTML;
/* edit-sectionText is textarea */
if (newContent.indexOf('<h1>') > -1 && newContent.indexOf('<h1>') < 10) { /* <h1> is at beginning so replace with newHeading */
let toberemoved = newContent.match('<h1>.*<\/h1>');
newContent = newContent.replace(toberemoved[0], '').trim();
sections[i].innerHTML = '<h1>'+newHeading+'</h1>' + sections[i].innerHTML;
} else { /* newContent has no h1 as first child, so add h1 from newHeading */
sections[i].innerHTML = '<h1>'+newHeading+'</h1>' + newContent;
}
}
}

Problem with Regular expressions is they do not really work well with HTML. So Your best bet is to convert it to a DOM fragment and do the manipulations and convert it back. Only issue with this method really is you can lose formatting. There are libraries out there that can pretty print HTML.
function updateHeadline(txt) {
const ta = document.querySelector("textarea");
const data = ta.value; // read value, not innerHTML
const temp = document.createElement('div'); // temp div to hold html
temp.innerHTML = data; // set the html to the temp element
let firstChild = temp.firstElementChild // look at the dom
if (!firstChild || firstChild.tagName!=="H1") { // see if we have an h1
firstChild = document.createElement("h1") // if not create one
temp.prepend(firstChild) // add it to the front
}
firstChild.innerHTML = txt // set the new text of the h1
ta.value = temp.innerHTML // put the content back into the textarea
}
const btn = document.querySelector("button");
btn.addEventListener("click", function (e) {
e.preventDefault()
updateHeadline(document.querySelector("#text").value)
})
textarea {
width: 300px;
height: 200px;
}
<textarea>
<p>Some other text</p>
<p>Some more text</p>
</textarea>
<input value="foo" id="text"/>
<button>Set</button>

You could use regex like so, (updated based on comment)
if( /^\s*<h1>/gi.test(stringToTest) ) {
//logic here
}
It checks if the stringToTest begins with ^ tag
See here : https://regex101.com/r/vSo4sL/1

convert to dom, and parse dom
this portion of code makes it possible to treat a chain to retrieve titles placed in the H1 tag and (on the fly) treat the string of characters.
It's easily expandable for future processing : or tag processing or other ...!
commented code
<html>
<script>
var s="<H1>Hey Title</H1>\n Hello,\n other title <H1>Green!</H1>\n Ipsum dolore sit...";
console.log(s);
console.log("-------------------------");
var partialDoc = document.createElement( 'html' );
partialDoc.innerHTML = s;
var parsed='';
var titles=[];
treatment(partialDoc);
console.log("\n-------------------------\n");
console.log("parsed",parsed);
console.log("\n-------------------------\n");
console.log("s var contains "+titles.length+ " H1 tag");
console.log("titles "+titles);
function treatment(root) {
var child = root.firstChild;
while (child) {
// child.nodeName = H1 | H2 | P etc...
// child.nodeType = 1
// catch H1
if (child.nodeName=='H1') {
// append your title,
parsed+=" [H1 FOUND content= {"+child.innerText+"} H1])";
// or
// parsed+="<H1>"+child.innerText+"<H1>";
// add your own process here
// add this title in array
// or what you want...
titles.push(child.innerText);
// next part of document
child = child.nextSibling;
continue;
}
// capture other text than H1
if (child.nodeType==3) { // Node Type Text
parsed+=child.nodeValue;
}
if (child.nodeType==1) { // Node Type ELEMENT, : sub nodes...
treatment(child);
}
// continue the rest of doc
child = child.nextSibling;
}
}
</script>
</html>

One way you could do it is: Node.firstElementChild which will avoid giving child node as #text for white-spaces and Node.nodeName
let firstChild = document.getElementById('edit-sectionText').firstElementChild;
if(firstChild.nodeName === "H1"){
firstChild.innerHTML = "Replacement Value"
}
Note & Update: The earlier api that I had suggested Node.firstChild will not prevent white-spaces which gives #text node and comments as #comment node.
2nd Way: Node.children and picking the first child out of it should have a similar result to Node.firstElementChild.
let elem = document.getElementById('edit-sectionText');
if(elem){
let firstChild = elem.children[0];
}
Update based on comments: Using Dom Parser Interface
The interface allows to parse XML or HTML source from a string based on the mime type provided for its method parseFromString(string, mimeType)
It will give the top level #document node with parsed HTML from the string where if exists <h1> or <H1> at the beginning would be the first child of body and subsequently can be tested via tagName property.
Note: Takes care of preceding HTML comments and spaces at the beginning but a caveat is doesn't check fully closed tags ex: var s = \t <h1>I am a heading <h1> here the <h1> was never closed and in the result will two fully formed headings at the body with content : I am a heading and ""
let textAreaString = document.getElementById("edit-sectionText").value;
const domParser = new DOMParser();
const parsedDoc = domParser.parseFromString(textAreaString, "text/html");
if (parsedDoc.body.firstElementChild.tagName === "H1") {
//yes it starts with <h1> or <H1>
}

Highlight Ajax Response with Javascript

I'm trying to highlight a query inside a text coming from an ajax response, before constructing HTML with it and pasting that into the DOM. Right now I'm using this code snippet:
function highlightWords(line, word, htmltag) {
var tag = htmltag || ["<b>", "</b>"];
var regex = new RegExp('(' + preg_quote(word) + ')', 'gi');
return line.replace(regex, tag[0] + "$1" + tag[1]);
}
function preg_quote(str) {
return (str + '').replace(/([\\\.\+\*\?\[\^\]\$\(\)\{\}\=\!\<\>\|\:])/g, "\\$1");
}
However, this is not capeable of highlighting different words if the query is something like sit behind. It will only highlight the complete phrase and not the single words. It also doesn't care about HTML tags and that produces unpretty results if the query is span for example...
I've found various libraries which handle highlighting way better, like https://markjs.io/ or https://www.the-art-of-web.com/javascript/search-highlight/
Those libraries though always want to highlight content which is already present in the DOM.
My search gets an ajax response, which I then turn into HTML with JS and paste the complete HTMLString into a parent container using DOM7 (which is similar to jQuery). Therfor I would prefer to highlight the text before creating the HTMLString and pasting it in the DOM.
Any ideas?

I just make the highlight in the response of ajax request. It's works for me:
$.ajax({
url : url,
type : 'POST',
success: function(response) {
// Highlight
let term = 'word';
$context = $("#selector");
$context.show().unmark();
if (term){
$context.mark(term, {
done: function() {
$context.not(":has(mark)").hide();
}
});
}
}
});

Snippet style: Warning: this uses DOM7 as per Question
Overview: Instead of appending the whole text as HTML string to your #container,
Append the portions of normal text, as text, and the highlighted elements as elements, so you can style them at will.
var text // your ajax text response
var strQuery = 'sit behind' // your query string
var queryWords = strQuery.split(' ')
var textWords = text.split(' ')
var bufferNormalWords = []
textWords.forEach(function (word) {
if (queryWords.indexOf(word) != -1) { // found
var normalWords = bufferNormalWords.splice(0, buffer.length) // empty buffer
// Your DOM7 commands
$$('#container').add('span').text(normalWords.join(' ')) // normal text
$$('#container').add('span').css('color', 'red').text(word + ' ') // why not red
}
else bufferNormalWords.push(word)
})
Do not mess up with text becoming HTMLStrings, just set text, and create the necesary elements to style them as you want with your DOM7.

If your ajax response contains html, I don't think there's an easy way to get around creating DOM elements first. Below gets the job done, even in the case where span is in the query and the ajax results contain <span>
function highlightWords(line, word, htmltag) {
var words = word.split(/\s+/);
var tag = htmltag || ["<b>", "</b>"];
var root = document.createElement("div");
root.innerHTML = line;
root = _highlightWords(words, tag, root);
return root.innerHTML;
}
// Recursively search the created DOM element
function _highlightWords(words, htmlTag, el) {
var children = [];
el.childNodes.forEach(function(el) {
if (el.nodeType != 3) { // anything other than Text Type
var highlighted = _highlightWords(words, htmlTag, el);
children.push(highlighted);
} else {
var line = _highlight(el.textContent, words, htmlTag);
var span = document.createElement("span");
span.innerHTML = line;
children.push(span);
}
});
// Clear the html of the element, so the new children can be added
el.innerHTML = "";
children.forEach(function (c) { el.appendChild(c)});
return el;
}
// Find and highlight any of the words
function _highlight(line, words, htmlTag) {
words.forEach(function(singleWord) {
if (!!singleWord) {
singleWord = htmlEscape(singleWord);
line = line.replace(singleWord, htmlTag[0] + singleWord + htmlTag[1]);
}
});
return line;
}

I think you were on the right track using a library for that.
I have been using for that a great library named mark.js.
It works without dependencies or with jQuery.
The way that you can make it work.
Make the AJAX call.
Load the string to the DOM.
Call the Mark.js API on the content you have loaded.
Here's a code snippet:
document.addEventListener('DOMContentLoaded', getText);
function getText() {
const headline = document.getElementsByTagName("h1")[0];
const p = document.getElementsByTagName("p")[0];
fetch('https://jsonplaceholder.typicode.com/posts/1').
then(response => response.json()).
then(json => {
console.log(json);
headline.innerHTML = json.title;
p.innerHTML = json.body;
addMark('aut facere');
});
}
function addMark(keyword) {
var markInstance = new Mark(document.querySelector('.context'));
var options = {
separateWordSearch: true
};
markInstance.unmark({
done: function() {
markInstance.mark(keyword, options);
},
});
}
<script src="https://cdn.jsdelivr.net/mark.js/8.6.0/mark.min.js"></script>
<div class="context">
<h1></h1>
<p></p>
</div>

Automatically Highlight Specific Word in Browser

In the content scripts of my chrome extension I am trying to inject code that highlights a specific word in the page.
In this instance, I am viewing espn.com and would like to have all instances of 'bryant' highlighted in the text immediately as the page is loaded.
This is the current code I have customized after viewing several questions similar to mine:
var all = document.getElementsByTagName("*");
highlight_words('Bryant', all);
function highlight_words(keywords, element) {
if(keywords) {
var textNodes;
keywords = keywords.replace(/\W/g, '');
var str = keywords.split(" ");
$(str).each(function() {
var term = this;
var textNodes = $(element).contents().filter(function() { return this.nodeType === 3 });
textNodes.each(function() {
var content = $(this).text();
var regex = new RegExp(term, "gi");
content = content.replace(regex, '<span class="highlight">' + term + '</span>');
$(this).replaceWith(content);
});
});
}
}
In my jquery-ui.css I have the following code. I understand it does not highlight at this moment but I am just trying to get a proof of concept:
.highlight {
font-weight: bold;
}
At this time everything loads properly but no iteration of 'bryant' is read in bold.
Thanks!

The fastest way to do this is to define the what are you want to search for highlight, for example:
You have 3 parts on site, the navbar on left, the title on the top and the content.
Lets attach .foo class to article.
var list = document.getElementsByClassName("foo")
var search_word = ""
var contents = []
for(var i = 0; i < list.length; i++){
var contents = list[i].textContext.split(search_word)
list[i].textContext = contents.join('<span class="heighlight\">'+search_word+'</span>')
}
Hope it will help.
(The highlight is bound to elements that have .foo class)
some example: https://jsfiddle.net/Danielduel/0842qntu/2/

Make text into links outside of <a> tags only

Here's my issue. I made a function that resolves links in javascript, but the use-case I'm stuck with is that there may already be HTML in posts with links.
Users can not post true HTML, but moderators and administrators can, meaning I need to handle both cases.
Here's an example of your typical user post HTML:
<div class="teaser">
This is just your normal post http://google.com some other stuff
</div>
And administrator/moderator:
<div class="teaser">
<b>
THIS LINK
</b>
<br><br>
Supplemental reading: Link again
</div>
Normally, I'd use something like
function replaceURLWithHTMLLinks(text) {
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(exp,"<a href='$1' target='_blank'>$1</a>");
}
c.not('a').each(function() {
var html = $(this).html();
$(this).html(replaceURLWithHTMLLinks(html));
});
But this causes links to be parsed which exist inside of the href property. I need to be able to create links only when they are outside of tags, and it needs to be through all children as you'll notice that is the first child in a mod/admin post (if they so choose).
Mods and admins can put basically any HTML they desire in their posts, so the tag could be anywhere in the post hierarchy which is not at all consistent.
I could just not parse links on admin or mod posts, but sometimes some mods and admins use the proper HTML tags, and sometimes they don't, which is why I'd like to know the proper way of doing this.

Try this:
var exp = /^(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
$('.teaser').each(function() {
var i, words;
$this = $(this);
words = $this.html().split(' ');
for (i = 0; i < words.length; i++) {
if (exp.test(words[i])) {
words[i] = words[i].replace(exp, "<a href='$1' target='_blank'>$1</a>");
}
}
$this.html(words.join(' '));
});
Demo Link

I found the answer here it seems.
filterTeaserLinkContent: function(data) {
var exp = /\b((https?|ftps?|about|bitcoin|git|irc[s6]?):(\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/|magnet:\?(dn|x[lts]|as|kt|mt|tr)=)([^\s()<>]+|\([^\s()<>]+\))+(\([^\s()<>]+\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])/g;
var nodes = data[0].childNodes;
for(var i = 0; i < nodes.length; i++) {
var n = nodes[i];
if(n.nodeType == n.TEXT_NODE || n.nodeName == 'BR') {
var g = n.textContent.match(exp);
while(g) {
var idx=n.textContent.indexOf(g[0]);
var pre=n.textContent.substring(0,idx);
var a=document.createElement("a");
if (!/^[a-z][\w-]+:/.test(g[0])) {
a.href = "http://" + g[0];
} else {
a.href = g[0];
}
a.innerText = g[0];
n.textContent = n.textContent.substring(idx+g[0].length);
n.parentElement.insertBefore(a,n);
g=n.textContent.match(exp);
}
} else {
Board.filterTeaserLinkContent($(n));
}
}
},
filterTeaserContent: function(data) {
// Jam into <div> so we can play with it
var c = $('<div>' + data + '</div>');
// Remove <wbr> tag which breaks links
c.find('wbr').each(function() {
$(this).remove();
});
// Re-parse the HTML after removing <wbr> or else the text nodes won't be joined
c = $('<div>' + c.html() + '</div>');
// I actually forget what this does, but fuck it. Shit.
c.not("div, s, span, a").each(function() {
var content = $(this).contents();
$(this).replaceWith(content);
});
Board.filterTeaserLinkContent(c);
// Remove images in post preview because they don't need to be here...
c.find('img').each(function() {
$(this).remove();
});
// Simplify line breaks
return c.html().replace(/<br ?\/?><br ?\/?>/g, "<br>");
},
This is for use in the 4chan API in case anyone was curious.

Manipulating DOM data without affecting the view

<!DOCTYPE html>
<html><body>
<p id="intro">Hello <em id="abcd">intro</em> World!</p>
<script type="text/javascript">
var txt=document.getElementById("intro").innerHTML;
var el = document.createElement("span");
el.innerHTML = txt;
var aa = el.getElementById("abcd").innerHTML;
alert( aa );
</script>
</body></html>
The above is a simple snippet. Actually I have an HTML editor and when the user saves the data I should save only the required content. Here I am getting the content of an element and manipulating it with DOM and pass the details to the server. This way I will not change the page content (user view remains the same) and he/she will continue editing the document.
The above is a simple example but in the real case I have to remove, change and move certain elements. The above code fails el.getElementById("abcd").innerHTML. Appreciate any pointers.

You can create a hidden iframe to manipulate all your changes, thus creating a separate DOM, then simply pull back the results you want.
var iframe;
if (document.createElement && (iframe = document.createElement('iframe'))) {
iframe.name = iframe.id = "externalDocument";
iframe.className = "hidden";
document.body.appendChild(iframe);
var externalDocument;
if (iframe.contentDocument) {
externalDocument = iframe.contentDocument;
} else if (iframe.contentWindow) {
externalDocument = iframe.contentWindow.document;
}
else if (window.frames[iframe.name]) {
externalDocument = window.frames[iframe.name].document;
}
if (externalDocument) {
externalDocument.open();
externalDocument.write('<html><body><\/body><\/html>');
externalDocument.close();
/* Run your manipulations here */
var txt = document.getElementById("intro").innerHTML;
var el = document.createElement("span");
el.innerHTML = txt;
/* Attach your objects to the externalDocument */
externalDocument.body.appendChild(el);
/* Reference the externalDocument to manipulate */
var aa = externalDocument.getElementById("abcd").innerHTML;
alert(aa);
}
/* Completed manipulation - Remove iFrame */
document.removeChild(iframe);
}
I have it working here:
http://jsfiddle.net/ucpvP/

Try using jQuery like given below.
function SaveData() //Your add function
{
var txt=$("#intro").html();
$(document).append("<span id='abcd'>" + txt+ "</span>");
var aa = $("#abcd").hmtl();
alert(aa);
}

You can use a DOM Element that is never appended to the DOM.
I use this 'cleanup' function:
function cleanup(str){
var tester = document.createElement('div'),
invalid, result;
tester.innerHTML = str;
//elements I don't allow
invalid = tester.querySelectorAll('script,object,iframe,style,hr,canvas');
// the cleanup (remove unwanted elements)
for (var i=0;i<invalid.length;(i+=1)){
invalid[i].parentNode.removeChild(invalid[i]);
}
result = tester.innerHTML;
tester = invalid = null;
//diacritics to html-encoded
return result.replace(/[\u0080-\u024F]/g,
function(a) {return '&#'+a.charCodeAt(0)+';';}
)
.replace(/%/g,'%25');
}
//usage:
cleanup(document.getElementById("intro").innerHTML);
You can extend the function with your own code to remove, change and move certain elements.

We Keep Coding

JavaScript is the programming language of the Web.

Paste MS Word as Text but keep some styling Jquery - javascript

I found it. Selector is not the problem. $('.editable_div').msword_html_filter(); is normally working. But if the parent div is contenteditable = true, the script is not working for children divs. I still dont know why. Thanks you all.

Related

Javascript: How to check/compare if a String contains some tag?

Highlight Ajax Response with Javascript

Automatically Highlight Specific Word in Browser

Make text into links outside of <a> tags only

Manipulating DOM data without affecting the view

Categories

Resources