javascript脚本压缩工具JSEncoder实现
1、算法原理:
从javascript脚本文件中提取单词,存入字典表中,这里使用|分割的字符串,然后将单词对应的序号(仿base64编码值)写入原来代码的地方,形成压缩后的js代码
2、压缩效果:
jquery-1.2.3.js原始文件大小95kb->[其他工具处理,去掉回车、注释等]jquery-1.2.3.min.js,大小53kb
=>本文工具压缩后:32kb
3、可选其他工具:
JSA 2.0 pre-alpha:http://sourceforge.net/project/showfiles.php?group_id=175776
packer:http://dean.edwards.name/packer/
4、下载(包含源代码在jar文件中)
2008.4 Ver:0.5? 下载
5、源代码
java代码如下,写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现,不过好像作者没有开源,
本文算作是一种技术上的研究了。
通过对jquery-1.2.3.min.js http://code.google.com/p/jqueryjs/downloads/detail?name=jquery-1.2.3.min.js
进行压缩测试通过,压缩率>40%.
??1package?com.cngd.jstool;
??2
??3import?java.io.FileReader;
??4import?java.io.BufferedReader;
??5import?java.io.FileOutputStream;
??6import?java.io.IOException;
??7import?java.util.regex.Matcher;
??8import?java.util.regex.Pattern;
??9import?java.util.Vector;
?10import?java.text.DecimalFormat;
?11
?12/**
?13?*?JSEncoder脚本压缩工具
?14?*?<p/>
?15?*?写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现
?16?*?针对jquery-1.2.3.min.js这个文件的压缩比率结果比较如下
?17?*?-------------------------------
?18?*?原始大小?|?JSEncoder?|?JSA-20071021(2.0?pre-alpha)?|??jquery?packer算法
?19?*?-------------------------------
?20?*?53kb???|?32kb???????|??29kb???????????????????????|????29kb
?21?*?-------------------------------
?22?*?因为JSA进一步将局部变量进行了压缩,因此相比较更小
?23?*?<p/>
?24?*?<p/>
?25?*?<p/>
?26?*?User:?(在路上?http://www.cnblogs.com/midea0978)
?27?*?Date:?2008-4-18
?28?*?Version:0.5
?29?*/
?30public?class?JSEncoder?{
?31????public?static?final?String?ENCODE_BASE64?=?"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$";
?32????public?boolean?isDebug?=?false;
?33
?34????/**
?35?????*?@param?filename?js?filename
?36?????*?@param?offset???offset>=0指定偏移变量,不同的offset可以实现代码表位置的变换,较小的offset可以获得更大的压缩率
?37?????*?@return?压缩后的代码
?38?????*/
?39????public?String?encode(String?filename,?int?offset)?throws?Exception?{
?40????????String?jscript?=?readFileData(filename);
?41????????int?size?=?jscript.length();
?42????????jscript?=?jscript.replaceAll("\n",?"?");
?43????????//替换\->\\
?44????????jscript?=?jscript.replaceAll("\\\\",?"\\\\\\\\");
?45????????//替换单引号'=>\'
?46????????jscript?=?jscript.replaceAll("\\'",?"\\\\\\'");
?47
?48????????Pattern?p?=?Pattern.compile("([\\w\\$]+)");
?49????????Matcher?m?=?p.matcher(jscript);
?50????????String?element;
?51????????Vector<String>?dict?=?new?Vector<String>();
?52????????int?index;
?53????????StringBuffer?encscript?=?new?StringBuffer();
?54????????StringBuffer?dicttab?=?new?StringBuffer();
?55
?56????????debugInfo("=====编码字典对应表=====");
?57????????while?(m.find())?{
?58????????????element?=?m.group(1).trim();
?59????????????if?(!dict.contains(element))?{
?60????????????????dict.add(element);
?61????????????????index?=?dict.size()?-?1;
?62????????????}?else?{
?63????????????????index?=?dict.indexOf(element);
?64????????????}
?65????????????debugInfo(index?+?"==>"?+?element);
?66????????????m.appendReplacement(encscript,?Base64Encode(offset?+?index?+?1));
?67????????}
?68????????for?(String?o?:?dict)?dicttab.append(o?+?"|");
?69????????m.appendTail(encscript);
?70????????debugInfo("=====??编码字典结束??=====");
?71????????debugInfo("Offset="?+?offset?+?",字典大小="?+?dict.size());
?72????????debugInfo("压缩后的代码:\n"?+?encscript.toString());
?73????????String?dictstr?=?dicttab.substring(0,?dicttab.length()?-?1).toString();
?74????????debugInfo("字典字符串:\n"?+?dictstr);
?75????????String?res?=?formatCode(encscript.toString(),?dictstr,?dict.size(),?offset);
?76????????int?packsize?=?res.length();
?77????????DecimalFormat?df?=?new?DecimalFormat("######.0");
?78????????System.out.println("\n原始文件大小:"?+?size?+?"\n压缩后文件大小:"?+?packsize);
?79????????System.out.println("=================\n压缩比率:"?+?df.format((size?-?packsize)?*?100.0?/?size)?+?"%");
?80????????return?res;
?81????}
?82
?83????private?String?readFileData(String?filename)?throws?IOException?{
?84????????BufferedReader?in?=?new?BufferedReader(new?FileReader(filename));
?85????????StringBuffer?sb?=?new?StringBuffer();
?86????????while?(in.ready())?{
?87????????????sb.append(in.readLine()?+?"\n");
?88????????}
?89????????in.close();
?90????????return?sb.toString();
?91????}
?92
?93????private?void?debugInfo(String?txt)?{
?94????????if?(isDebug)?System.out.println(txt);
?95????}
?96
?97????public?static?void?main(String[]?args)?{
?98????????System.out.println("JSEncoder?0.5?by?midea0978?2008.4");
?99????????System.out.println("=====================================");
100????????System.out.println("http://www.cnblogs.com/midea0978\n");
101????????if?(args.length?<?2)?{
102????????????System.out.println("Usage:java?JSEncoder.jar?jsfile?outputfile?[offset].");
103????????????System.exit(0);
104????????}
105????????try?{
106????????????System.out.println("输入文件:?"?+?args[0]);
107????????????System.out.println("输出文件:?"?+?args[1]);
108????????????JSEncoder?util?=?new?JSEncoder();
109????????????int?offset?=?args.length?>=?3???Integer.parseInt(args[2])?:?0;
110????????????String?code?=?util.encode(args[0],?offset);
111????????????FileOutputStream?fs?=?new?FileOutputStream(args[1]);
112????????????fs.write(code.getBytes());
113????????????fs.close();
114????????}?catch?(Exception?e)?{
115????????????e.printStackTrace();
116????????}
117????}
118
119????/**
120?????*?仿Base64解码
121?????*
122?????*?@param?c?待编码的数字
123?????*?@return?编码值
124?????*/
125????private?String?Base64Encode(int?c)?throws?Exception?{
126????????String?res;
127????????if?(c?<?0)?throw?new?Exception("Error:Offset必须>=0.");
128????????if?(c?>?63)
129????????????res?=?Base64Encode(c?>>?6)?+?Base64Encode(c?&?63);
130????????else?{
131????????????//为了配合appendReplacement方法的使用,将$替换为\$
132????????????res?=?c?==?63???"\\$"?:?String.valueOf(ENCODE_BASE64.charAt(c));
133????????}
134????????return?res;
135????}
136
137????private?String?formatCode(String?enc,?String?dict,?int?size,?int?offset)?{
138????????StringBuffer?str?=?new?StringBuffer();
139????????str.append("/*?Compressed?by?JSEncoder?*/\neval(function(E,I,A,D,J,K,L,H){function?C(A){return?A<62?String.fromCharCode(A+=A<26?65:A<52?71:-4):A<63?'_':A<64?'$':C(A>>6)+C(A&63)}while(A>0)K[C(D--)]=I[--A];function?N(A){return?K[A]==L[A]?A:K[A]}if(''.replace(/^/,String)){var?M=E.match(J),B=M[0],F=E.split(J),G=0;if(E.indexOf(F[0]))F=[''].concat(F);do{H[A++]=F[G++];H[A++]=N(B)}while(B=M[G]);H[A++]=F[G]||'';return?H.join('')}return?E.replace(J,N)}(");
140????????str.append("'"?+?enc?+?"',");
141????????str.append("'"?+?dict?+?"'.split('|'),");
142????????str.append(size?+?","?+?(size?+?offset)?+?",/[\\w\\$]+/g,?{},?{},?[]))");
143????????return?str.toString();
144????}
145
146}
147
??2
??3import?java.io.FileReader;
??4import?java.io.BufferedReader;
??5import?java.io.FileOutputStream;
??6import?java.io.IOException;
??7import?java.util.regex.Matcher;
??8import?java.util.regex.Pattern;
??9import?java.util.Vector;
?10import?java.text.DecimalFormat;
?11
?12/**
?13?*?JSEncoder脚本压缩工具
?14?*?<p/>
?15?*?写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现
?16?*?针对jquery-1.2.3.min.js这个文件的压缩比率结果比较如下
?17?*?-------------------------------
?18?*?原始大小?|?JSEncoder?|?JSA-20071021(2.0?pre-alpha)?|??jquery?packer算法
?19?*?-------------------------------
?20?*?53kb???|?32kb???????|??29kb???????????????????????|????29kb
?21?*?-------------------------------
?22?*?因为JSA进一步将局部变量进行了压缩,因此相比较更小
?23?*?<p/>
?24?*?<p/>
?25?*?<p/>
?26?*?User:?(在路上?http://www.cnblogs.com/midea0978)
?27?*?Date:?2008-4-18
?28?*?Version:0.5
?29?*/
?30public?class?JSEncoder?{
?31????public?static?final?String?ENCODE_BASE64?=?"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$";
?32????public?boolean?isDebug?=?false;
?33
?34????/**
?35?????*?@param?filename?js?filename
?36?????*?@param?offset???offset>=0指定偏移变量,不同的offset可以实现代码表位置的变换,较小的offset可以获得更大的压缩率
?37?????*?@return?压缩后的代码
?38?????*/
?39????public?String?encode(String?filename,?int?offset)?throws?Exception?{
?40????????String?jscript?=?readFileData(filename);
?41????????int?size?=?jscript.length();
?42????????jscript?=?jscript.replaceAll("\n",?"?");
?43????????//替换\->\\
?44????????jscript?=?jscript.replaceAll("\\\\",?"\\\\\\\\");
?45????????//替换单引号'=>\'
?46????????jscript?=?jscript.replaceAll("\\'",?"\\\\\\'");
?47
?48????????Pattern?p?=?Pattern.compile("([\\w\\$]+)");
?49????????Matcher?m?=?p.matcher(jscript);
?50????????String?element;
?51????????Vector<String>?dict?=?new?Vector<String>();
?52????????int?index;
?53????????StringBuffer?encscript?=?new?StringBuffer();
?54????????StringBuffer?dicttab?=?new?StringBuffer();
?55
?56????????debugInfo("=====编码字典对应表=====");
?57????????while?(m.find())?{
?58????????????element?=?m.group(1).trim();
?59????????????if?(!dict.contains(element))?{
?60????????????????dict.add(element);
?61????????????????index?=?dict.size()?-?1;
?62????????????}?else?{
?63????????????????index?=?dict.indexOf(element);
?64????????????}
?65????????????debugInfo(index?+?"==>"?+?element);
?66????????????m.appendReplacement(encscript,?Base64Encode(offset?+?index?+?1));
?67????????}
?68????????for?(String?o?:?dict)?dicttab.append(o?+?"|");
?69????????m.appendTail(encscript);
?70????????debugInfo("=====??编码字典结束??=====");
?71????????debugInfo("Offset="?+?offset?+?",字典大小="?+?dict.size());
?72????????debugInfo("压缩后的代码:\n"?+?encscript.toString());
?73????????String?dictstr?=?dicttab.substring(0,?dicttab.length()?-?1).toString();
?74????????debugInfo("字典字符串:\n"?+?dictstr);
?75????????String?res?=?formatCode(encscript.toString(),?dictstr,?dict.size(),?offset);
?76????????int?packsize?=?res.length();
?77????????DecimalFormat?df?=?new?DecimalFormat("######.0");
?78????????System.out.println("\n原始文件大小:"?+?size?+?"\n压缩后文件大小:"?+?packsize);
?79????????System.out.println("=================\n压缩比率:"?+?df.format((size?-?packsize)?*?100.0?/?size)?+?"%");
?80????????return?res;
?81????}
?82
?83????private?String?readFileData(String?filename)?throws?IOException?{
?84????????BufferedReader?in?=?new?BufferedReader(new?FileReader(filename));
?85????????StringBuffer?sb?=?new?StringBuffer();
?86????????while?(in.ready())?{
?87????????????sb.append(in.readLine()?+?"\n");
?88????????}
?89????????in.close();
?90????????return?sb.toString();
?91????}
?92
?93????private?void?debugInfo(String?txt)?{
?94????????if?(isDebug)?System.out.println(txt);
?95????}
?96
?97????public?static?void?main(String[]?args)?{
?98????????System.out.println("JSEncoder?0.5?by?midea0978?2008.4");
?99????????System.out.println("=====================================");
100????????System.out.println("http://www.cnblogs.com/midea0978\n");
101????????if?(args.length?<?2)?{
102????????????System.out.println("Usage:java?JSEncoder.jar?jsfile?outputfile?[offset].");
103????????????System.exit(0);
104????????}
105????????try?{
106????????????System.out.println("输入文件:?"?+?args[0]);
107????????????System.out.println("输出文件:?"?+?args[1]);
108????????????JSEncoder?util?=?new?JSEncoder();
109????????????int?offset?=?args.length?>=?3???Integer.parseInt(args[2])?:?0;
110????????????String?code?=?util.encode(args[0],?offset);
111????????????FileOutputStream?fs?=?new?FileOutputStream(args[1]);
112????????????fs.write(code.getBytes());
113????????????fs.close();
114????????}?catch?(Exception?e)?{
115????????????e.printStackTrace();
116????????}
117????}
118
119????/**
120?????*?仿Base64解码
121?????*
122?????*?@param?c?待编码的数字
123?????*?@return?编码值
124?????*/
125????private?String?Base64Encode(int?c)?throws?Exception?{
126????????String?res;
127????????if?(c?<?0)?throw?new?Exception("Error:Offset必须>=0.");
128????????if?(c?>?63)
129????????????res?=?Base64Encode(c?>>?6)?+?Base64Encode(c?&?63);
130????????else?{
131????????????//为了配合appendReplacement方法的使用,将$替换为\$
132????????????res?=?c?==?63???"\\$"?:?String.valueOf(ENCODE_BASE64.charAt(c));
133????????}
134????????return?res;
135????}
136
137????private?String?formatCode(String?enc,?String?dict,?int?size,?int?offset)?{
138????????StringBuffer?str?=?new?StringBuffer();
139????????str.append("/*?Compressed?by?JSEncoder?*/\neval(function(E,I,A,D,J,K,L,H){function?C(A){return?A<62?String.fromCharCode(A+=A<26?65:A<52?71:-4):A<63?'_':A<64?'$':C(A>>6)+C(A&63)}while(A>0)K[C(D--)]=I[--A];function?N(A){return?K[A]==L[A]?A:K[A]}if(''.replace(/^/,String)){var?M=E.match(J),B=M[0],F=E.split(J),G=0;if(E.indexOf(F[0]))F=[''].concat(F);do{H[A++]=F[G++];H[A++]=N(B)}while(B=M[G]);H[A++]=F[G]||'';return?H.join('')}return?E.replace(J,N)}(");
140????????str.append("'"?+?enc?+?"',");
141????????str.append("'"?+?dict?+?"'.split('|'),");
142????????str.append(size?+?","?+?(size?+?offset)?+?",/[\\w\\$]+/g,?{},?{},?[]))");
143????????return?str.toString();
144????}
145
146}
147