- Forums - Reply - Statistics - Sign Up - Search -

The Wyzz WYSIWYG Editor / Coding Discussion / Clean up MS word shit
Author Message
Fia
Member
# Posted: 12 Mar 2007 14:39


Hi!

i'm trying to clean up all the shit that ms word does to you code and I found this

function cleanUp(){ //clean up crud inserted by Micro$oft Orifice
oW.document.execCommand("removeformat",false,null) ;
var h=oW.document.body.innerHTML;
var vicious=false;
if (gentleClean=="ask") vicious=confirm(t('Remove all styles and classes?'));
if (!gentleClean || vicious){
h=h.replace(/<\/?(SPAN|DEL|INS|U|DIR)[^>]*>/gi, "")
.replace(/\b(CLASS|STYLE)=\"[^\"]*\"/gi, "")
.replace(/\b(CLASS|STYLE)=\w+/gi, "");
} //.replace(/(<FONT([^>])*>/gi, "$1SPAN style='$2'")
h=h.replace(/<\/?(\?XML|ST1|SHAPE|V:|O:|F:|F |PATH|LOCK|IMAGEDATA|STROKE|FORMULAS)[^>]*>/gi, "")
.replace(/\bCLASS=\"?MSO\w*\"?/gi, "")
.replace(/[–]/g,'-') //long –
.replace(/[‘’]/g, "'") //single smartquotes ‘’
.replace(/[“”]/g, '"') //double smartquotes “”
.replace(/align="?justify"?/gi, "") //justify sends some browsers mad
.replace(/<(TABLE|TD|TH|COL)(.*)(WIDTH|HEIGHT)["'0 -9]*/gi, "<$1$2") //no fixed size tables (%OK) [^A-Za-z>]
.replace(/<([^>]+)>\s*<\/\1>/gi, ""); //empty tag
oW.document.body.innerHTML=h;
syncTextarea();
}

I tryed to implement that into the code with wyzz but couldn't get it to work. Is anybody good at this js coding and could help me???

Fia

RGaba
Member
# Posted: 27 Dec 2009 04:26


I just found this post and I'm trying to do the exact same thing. I haven't really dug too far into it yet, but I was just wondering if anybody has been able to accomplish this? I'm looking for a way to clean all the MS tags, on paste (via Control + V or right click paste) into the text area.

This can be an AJAX call or anything (preferably on the client). But right now, I have a lot of users who are pasting the text from a Word document directly into the text area and then all this other garbage gets inserted.

RGaba
Member
# Posted: 9 Jan 2010 18:21


Well, I wasn't able to get this exact script to work. Instead, I have a function that gets called onclick of the save button. This then cleans all the Word garbage and leaves the tags created by Wyzz. I had to make a lot of changes to the original script, but it can be found at:

www_1stclassmedia.co.uk/developers/clean-ms-word-f ormatting.php

The function I'm using (which works with Wyzz) is....

function CleanWordHTML( fieldId ){
var str = document.getElementById("wysiwyg" + fieldId).contentWindow.document.body.innerHTML;

str = str.replace(/<!(?:--[\s\S]*?--\s*)?>\s*/g,''); // Removes HTML Comments and their contents, looking for <!-- -->
str = str.replace(/<o:p>\s*<\/o:p>/g, "") ;
str = str.replace(/<o:p>.*?<\/o:p>/g, "&nbsp;") ;
str = str.replace( /\s*mso-[^:]+:[^;"]+;?/gi, "" ) ;
str = str.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, "" ) ;
str = str.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;
str = str.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, "" ) ;
str = str.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;
str = str.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;
str = str.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;
str = str.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;
str = str.replace( /\s*tab-stops:[^;"]*;?/gi, "" ) ;
str = str.replace( /\s*tab-stops:[^"]*/gi, "" ) ;
str = str.replace( /\s*face="[^"]*"/gi, "" ) ;
str = str.replace( /\s*face=[^ >]*/gi, "" ) ;
str = str.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, "" ) ;
str = str.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
str = str.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
str = str.replace (/<\\?\?xml[^>]*>/gi, '') ;
str = str.replace(/<\/?\w+:[^>]*>/gi, "") ;
str = str.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;
str = str.replace( /<H1([^>]*)>/gi, '' ) ;
str = str.replace( /<H2([^>]*)>/gi, '' ) ;
str = str.replace( /<H3([^>]*)>/gi, '' ) ;
str = str.replace( /<H4([^>]*)>/gi, '' ) ;
str = str.replace( /<H5([^>]*)>/gi, '' ) ;
str = str.replace( /<H6([^>]*)>/gi, '' ) ;
str = str.replace( /<\/H\d>/gi, '<br>' ) ; //remove this to take out breaks where Heading tags were
str = str.replace( /<link [^>]+>/g, '' ) ;
str = str.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
str = str.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
str = str.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
//some RegEx code for the picky browsers
var re = new RegExp("(<P)([^>]*>.*?)(<\/P>)","gi") ;
str = str.replace( re, "<div$2</div>" ) ;

document.getElementById("wysiwyg" + fieldId).contentWindow.document.body.innerHTML = str; // Sets Wyzz editor to new string
document.getElementById(fieldId).value = str; // Sets hidden field for Wyzz to new string
}

Stephen Price
Member
# Posted: 8 Feb 2010 21:01


that code doesn't seem to work for me. It leaves the following code at the top of the text area:

<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/><meta content="Word.Document" name="ProgId"/><meta content="Microsoft Word 12" name="Generator"/><meta content="Microsoft Word 12" name="Originator"/>

It also strips all the formatting selected in the Wyzz area

RGaba
Member
# Posted: 15 Feb 2010 03:43


It shouldn't remove all the formatting selected in the Wyzz area, but looking back at it, I see where it could remove some of the formatting. Which formatting do you see it's removing?

Your answer

Bold Style  Italic Style  Underlined Style  Image Link  URL Link  Disable BB codes *What's that?

 » Name  » Password 
 Only registered users can post here. Enter your login/password correctly before posting a message, or register first.
 

Powered by miniBB forum software © 2001-2010