问题:有如下一段字符串,需要提交到后台做保存。为了提高传输速率,降低文件大小,需要将该字符串进行压缩。
"25a133ad,25a133,25a133,25a133,25a133au,230a105ad,230a105,230a105,240a105,259a105,274a105,303a105,323a105,
337a105,345a105,347a105,348a105,348a105,348a105,348a105au,274a128ad,274a128,273a135,270a160,268a179,265a196,
263a207,261a215,261a217,261a217,261a217,261a217,261a217,261a217,261a217au,305a128ad,305a128,308a136,316a159,
322a181,325a199,327a206,327a206,327a206,327a206au,232a154ad,233a154,247a155,266a156,289a156,310a155,324a155,
335a155,339a155,339a155,339a155,339a155,339a155,339a155,339a155au,44a137ad,44a137,44a137,44a137au,419a130ad,
419a130,420a131,428a141,435a146,442a152,445a156,453a162,459a164,462a165,462a165,462a166,461a165,457a160,448a149,
445a146,444a145,444a145,443a145,443a146,445a152,449a158,455a159,457a159,465a156,474a151,478a150,481a148,482a148,
482a148,482a152,481a166,481a171,481a171,482a170,484a159,487a152,489a150,489a149,489a149,489a149,489a149,489a149,
489a149,489a149,489a149,489a149,489a149,489a149,489a149au,35a285ad,35a285,35a285,35a285,35a285au,560a138ad,
560a138,568a148,577a159,584a168,591a175,598a182,599a183,599a183,599a183,598a180"
利用无损压缩算法,将字符串进行压缩,非常耗时间,as2实现起来比较勉强,是不是可以另寻捷径呢?
突破口:flash中的字符串都是用unicode编码的,每个字符占用2个字节(16bit),例如 “ABC”,unicode编码是0x0041,0x0042,0x0043。英文和标点符号占用了0x0000~0x0080这128个位置(实际上前面还有保留空位)。再观察一下我们要处理的数据,只有0~9,a,u,d,w,.,,一共是16种符号,假如建立一个字符字典的话,可以写成这样:dic=["0","1",......","],这样一来,可以根据数组下标来获取任意字符,例如要获取"3",就是dic[4]。
有了自定义的字符字典,就开始字符转换啦:
1. 先把原始字符串的逗号都去掉,然后变成四个一组。2a51 33ad ,25a.....一直到最后,不足四位的用字典的第一个字符补齐,并记住补了几个字符su_num。
2. 写一个函数getIDFromDic(_str:String):Number,来查找每个字符对应的下标。
2. 将每组字符串转化成一个unicode字符:String.fromCharCode(getIDFromDic("2")<<12 | getIDFromDic("a")<<8 |getIDFromDic("5")<<4 |getIDFromDic("1"))
3. 将转化后的字符串起来,形成新的字符串。
4.在这个新字符串的最前端或者最后端加上su_num.toString()
通过这种简单的转化,就将源字符串压缩掉75%,看起来非常了不起。
局限性:
一是待压缩的字符串所含的字符种类要在16以内,否则效果就不理想了。
二是由于使用了String.fromCharCode(),假如出现连续4个“0”的话,会出现一个空字符串,导致无法正确还原。
执行效率:
由于频繁使用数组查询,效率不高,可以参考第二段代码进行优化。
源代码1,使用字典查询进行压缩:
var dic:Array = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ".", ",", "a", "u", "d", "w"];
function getIDFromDic(_str:String):Number {
var len:Number = dic.length;
for (var i:Number = 0; i<len; i++) {
if (_str == dic[i]) {
return i;
}
}
return null;
}
function coding(_str:String):String {
var out_str:String = "";
var ss:Number = 0;
while (_str.length%4) {
_str += "0";
ss++;
}
out_str += String(ss);
var len:Number = _str.length;
for (i=0; i<len; i += 4) {
out_str += String.fromCharCode(getIDFromDic(_str.charAt(i)) << 12 | getIDFromDic(_str.charAt(i+1)) << 8 | getIDFromDic(_str.charAt(i+2)) << 4 | getIDFromDic(_str.charAt(i+3)));
}
return out_str;
}
function uncoding(_str:String):String {
var out_str:String = "";
var len:Number = _str.length;
var ss:Number = Number(_str.charAt(0));
for (i=1; i<len; i++) {
var n:Number = _str.charCodeAt(i);
//trace(n.toString(16))
out_str += dic[(n & 0xF000) >> 12]+dic[(n & 0x0F00) >> 8]+dic[(n & 0x00F0) >> 4]+dic[(n & 0x000F)];
}
out_str = out_str.substr(0, out_str.length-ss);
return out_str;
}
var str:String = "25a133ad,25a133,25a133,25a133,25a133au,230a105ad,230a105,230a105,240a105,259a105,274a105,303a105,
323a105,337a105,345a105,347a105,348a105,348a105,348a105,348a105au,274a128ad,274a128,273a135,270a160,268a179,
265a196,263a207,261a215,261a217,261a217,261a217,261a217,261a217,261a217,261a217au,305a128ad,305a128,308a136,
316a159,322a181,325a199,327a206,327a206,327a206,327a206au,232a154ad,233a154,247a155,266a156,289a156,310a155,
324a155,335a155,339a155,339a155,339a155,339a155,339a155,339a155,339a155au,44a137ad,44a137,44a137,44a137au,
419a130ad,419a130,420a131,428a141,435a146,442a152,445a156,453a162,459a164,462a165,462a165,462a166,461a165,457a160,
448a149,445a146,444a145,444a145,443a145,443a146,445a152,449a158,455a159,457a159,465a156,474a151,478a150,481a148,
482a148,482a148,482a152,481a166,481a171,481a171,482a170,484a159,487a152,489a150,489a149,489a149,489a149,489a149,
489a149,489a149,489a149,489a149,489a149,489a149,489a149,489a149au,35a285ad,35a285,35a285,35a285,35a285au,560a138ad,
560a138,568a148,577a159,584a168,591a175,598a182,599a183,599a183,599a183,598a180";
//
//
trace("源字符串(length="+str.length+"):");
///////////////////////////////////
var s_time:Number = getTimer();
var code_str:String = coding(str);
var d_time:Number = getTimer()-s_time;
trace("压缩耗时:"+d_time+"ms");
trace("压缩后字符串(length="+code_str.length+")");
//////////////////////////////////
//
///////////////////////////////////
var s_time:Number = getTimer();
var uncode_str:String = uncoding(code_str);
var d_time:Number = getTimer()-s_time;
trace("解压耗时:"+d_time+"ms");
trace("还原后字符串(length="+uncode_str.length+"):")//+uncode_str)
trace("还原是否正确:"+(uncode_str == str));
源码2:使用字典对象进行压缩,执行效率是前者的两倍
//说明,利用此方法进行压缩时,有一定的局限性,"0000"将被忽略掉,目前还没有找到解决方法。
//使用快速压缩时,只能含有数字和字母
var dic:Array = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ".", ",", "a", "u", "d", "w"];
var quick_dic:Object = new Object();
for (var i = 0; i<dic.length; i++) {
quick_dic["char"+dic[i]] = i;
}
function quickCoding(_str:String):String {
var out_str:String = "";
var ss:Number = 0;
while (_str.length%4) {
_str += "0";
ss++;
}
out_str += String(ss);
var len:Number = _str.length;
for (i=0; i<len; i += 4) {
out_str += String.fromCharCode(quick_dic["char"+_str.charAt(i)] << 12 | quick_dic["char"+_str.charAt(i+1)] << 8 | quick_dic["char"+_str.charAt(i+2)] << 4 | quick_dic["char"+_str.charAt(i+3)]);
}
return out_str;
}
function uncoding(_str:String):String {
var out_str:String = "";
var len:Number = _str.length;
var ss:Number = Number(_str.charAt(0));
for (i=1; i<len; i++) {
var n:Number = _str.charCodeAt(i);
//trace(n.toString(16))
out_str += dic[(n & 0xF000) >> 12]+dic[(n & 0x0F00) >> 8]+dic[(n & 0x00F0) >> 4]+dic[(n & 0x000F)];
}
out_str = out_str.substr(0, out_str.length-ss);
return out_str;
}
var str:String = "25a133ad,25a133,25a133,25a133,25a133au,230a105ad,230a105,230a105,240a105,259a105,274a105,303a105,
323a105,337a105,345a105,347a105,348a105,348a105,348a105,348a105au,274a128ad,274a128,273a135,270a160,268a179,
265a196,263a207,261a215,261a217,261a217,261a217,261a217,261a217,261a217,261a217au,305a128ad,305a128,308a136,
316a159,322a181,325a199,327a206,327a206,327a206,327a206au,232a154ad,233a154,247a155,266a156,289a156,310a155,
324a155,335a155,339a155,339a155,339a155,339a155,339a155,339a155,339a155au,44a137ad,44a137,44a137,44a137au,
419a130ad,419a130,420a131,428a141,435a146,442a152,445a156,453a162,459a164,462a165,462a165,462a166,461a165,
457a160,448a149,445a146,444a145,444a145,443a145,443a146,445a152,449a158,455a159,457a159,465a156,474a151,478a150,
481a148,482a148,482a148,482a152,481a166,481a171,481a171,482a170,484a159,487a152,489a150,489a149,489a149,
489a149,489a149,489a149,489a149,489a149,489a149,489a149,489a149,489a149,489a149au,35a285ad,35a285,35a285,35a285,
35a285au,560a138ad,560a138,568a148,577a159,584a168,591a175,598a182,599a183,599a183,599a183,598a180";
//
//
///////////////////////////////////
var s_time:Number = getTimer();
var q_code_str:String = quickCoding(str);
var d_time:Number = getTimer()-s_time;
trace("快速压缩耗时:"+d_time+"ms");
trace("压缩后字符串(length="+q_code_str.length+")");
//////////////////////////////////
//
///////////////////////////////////
var s_time:Number = getTimer();
var uncode_str:String = uncoding(q_code_str);
var d_time:Number = getTimer()-s_time;
trace("解压耗时:"+d_time+"ms");
trace("还原后字符串(length="+uncode_str.length+"):");
//+uncode_str)
trace("还原是否正确:"+(uncode_str == str));