[方便查看] 新浪微博爬虫系列之短链接和Mid的转换 base62
新浪微博的微博的地址都是:weibo.com/uid/url
后面的url是根据新浪微博的id算出来的短链接。
其实也就是个base62位编码
下面提供算法
using System;using System.Collections.Generic;using System.Linq;using System.Text;namespace Spider.Common{ /// <summary> /// 新浪62位编码转换 /// </summary> public class SinaBase62Convert { static String[] str62key = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" }; /** * url转化成mid的值 * @param url * @return */ public static String UrlToMid(String url) { String mid = ""; String k = url.ToString().Substring(3, 1);//用于第四位为0时的转换 if (!k.Equals("0")) { for (int i = url.Length - 4; i > -4; i = i - 4) {//分别以四个为一组 int offset1 = i < 0 ? 0 : i; int offset2 = i + 4; String str = url.ToString().Substring(offset1, offset2 - offset1); str = str62to10(str);//String类型的转化成十进制的数 // 若不是第一组,则不足7位补0 if (offset1 > 0) { while (str.Length < 7) { str = '0' + str; } } mid = str + mid; } } else { for (int i = url.Length - 4; i > -4; i = i - 4) { int offset1 = i < 0 ? 0 : i; int offset2 = i + 4; if (offset1 > -1 && offset1 < 1 || offset1 > 4) { String str = url.ToString().Substring(offset1, offset2 - offset1); str = str62to10(str); // 若不是第一组,则不足7位补0 if (offset1 > 0) { while (str.Length < 7) { str = '0' + str; } } mid = str + mid; } else { String str = url.ToString().Substring(offset1 + 1, offset2 - offset1 - 1); str = str62to10(str); // 若不是第一组,则不足7位补0 if (offset1 > 0) { while (str.Length < 7) { str = '0' + str; } } mid = str + mid; } } } return mid; } /** * mid转换成url编码以后的值 * @param mid * @return */ public static String MidToUrl(String mid) { String url = ""; try { for (int j = mid.Length - 7; j > -7; j = j - 7) {//以7个数字为一个单位进行转换 int offset3 = j < 0 ? 0 : j; int offset4 = j + 7; // String l = mid.substring(mid.Length - 14, mid.Length - 13); if ((j > 0 && j < 6) && (mid.Substring(mid.Length - 14, mid.Length - 13).Equals("0") && mid.Length == 19)) { String num = mid.ToString().Substring(offset3 + 1, offset4 - offset3 - 1); num = int10to62(int.Parse(num));//十进制转换成62进制 url = 0 + num + url; if (url.Length == 9) { url = url.Substring(1, url.Length); } } else { String num = mid.ToString().Substring(offset3, offset4 - offset3); num = int10to62(int.Parse(num)); url = num + url; } } } catch { } return url; } /** * 62进制转换成10进制 * @param str * @return */ private static String str62to10(String str) { String i10 = "0"; int c = 0; for (int i = 0; i < str.Length; i++) { int n = str.Length - i - 1; String s = str.Substring(i, 1); for (int k = 0; k < str62key.Length; k++) { if (s.Equals(str62key[k])) { int h = k; c += (int)(h * Math.Pow(62, n)); break; } } i10 = c.ToString(); } return i10; } /** * 10进制转换成62进制 * @param int10 * @return */ private static String int10to62(double int10) { String s62 = ""; int w = (int)int10; int r = 0; int a = 0; while (w != 0) { r = (int)(w % 62); s62 = str62key[r] + s62; a = (int)(w / 62); w = (int)Math.Floor((double)a); } return s62; } }}