求高手，URL匹配算法,该怎么解决

2012-03-22

求高手，URL匹配算法一个URL匹配算法，用原URL（src）去匹配目的URL（dst），原URL中可以存在两个通配符，它们是“*

求高手，URL匹配算法
一个URL匹配算法，用原URL（src）去匹配目的URL（dst），原URL中可以存在两个通配符，它们是“*”和“%”，其中“*”可以匹配任何字符串（也可以理解成匹配多个任意字符，可以是0个），“%”可以匹配除“/”以外的任何字符串（也可以理解成匹配除"/"以外的多个任意字符，可以是0个）。
举例如下：

原URL 目的URL 结果
无通配符 "http://www.baidu.com/"， "http://www.baidu.com/" 匹配
"http://www.baidu.com/", "http://news.baidu.com/" 不匹配

有通配符 "http://www.*.com/", "http://www.baidu.com/" 匹配
"http://www.%.com/", "http://www.baidu.com/" 匹配

"http://www.baidu.com/*", "http://www.baidu.com/"
"http://www.baidu.com/*", "http://www.baidu.com/index.html" 匹配
"http://www.baidu.com/*", "http://www.baidu.com/path/index.html" 匹配
{ "http://www.baidu.com/%", "http://www.baidu.com/" }, 匹配
{ "http://www.baidu.com/%", "http://www.baidu.com/index.html" 匹配
{ "http://www.baidu.com/%", "http://www.baidu.com/path/index.html" 不匹配

[解决办法]

C/C++ code

//摘自《代码之美》// 字符     含义// .        匹配任意的单个字符// ^        匹配输入字符串的开头// $        匹配输入字符串的结尾// *        匹配前一个字符的零个或者多个出现#include <stdio.h>int matchhere(char *regexp, char *text);int matchstar(int c, char *regexp, char *text) {// matchstar: search for c*regexp at beginning of text   do {// a * matches zero or more instances       if (matchhere(regexp, text)) return 1;   } while (*text != '\0' && (*text++ == c || c == '.'));   return 0;}int matchhere(char *regexp, char *text) {// matchhere: search for regexp at beginning of text   if (regexp[0] == '\0') return 1;   if (regexp[1] == '*') return matchstar(regexp[0], regexp+2, text);   if (regexp[0] == '$' && regexp[1] == '\0') return *text == '\0';   if (*text!='\0' && (regexp[0]=='.' || regexp[0]==*text)) return matchhere(regexp+1, text+1);   return 0;}int match(char *regexp, char *text) {// match: search for regexp anywhere in text    if (regexp[0] == '^') return matchhere(regexp+1, text);    do {// must look even if string is empty        if (matchhere(regexp, text)) return 1;    } while (*text++ != '\0');    return 0;}void main() {    printf("%d==match(abc ,abc)\n",match("abc" ,"abc"));    printf("%d==match(^a  ,abc)\n",match("^a"  ,"abc"));    printf("%d==match(c$  ,abc)\n",match("c$"  ,"abc"));    printf("%d==match(a.c ,abc)\n",match("a.c" ,"abc"));    printf("%d==match(a.*c,abc)\n",match("a.*c","abc"));    printf("-------------------\n");    printf("%d==match(ABC ,abc)\n",match("ABC" ,"abc"));    printf("%d==match(^B  ,abc)\n",match("^B"  ,"abc"));    printf("%d==match(A$  ,abc)\n",match("A$"  ,"abc"));    printf("%d==match(a..c,abc)\n",match("a..c","abc"));    printf("%d==match(a.*d,abc)\n",match("a.*d","abc"));}//1==match(abc ,abc)//1==match(^a  ,abc)//1==match(c$  ,abc)//1==match(a.c ,abc)//1==match(a.*c,abc)//-------------------//0==match(ABC ,abc)//0==match(^B  ,abc)//0==match(A$  ,abc)//0==match(a..c,abc)//0==match(a.*d,abc)
------解决方案--------------------  
C/C++ code#include <stdlib.h>#include <stdio.h>#include <string.h>bool match = false;void isMatch(char *src, char *dst, int srclen, int dstlen, int srcSub, int dstSub){    int i, j;    if(srcSub > srclen || dstSub > dstlen)    {        return;    }    while(src[srcSub] == dst[dstSub])    {        ++srcSub;        ++dstSub;        if(src[srcSub] == '\0' || dst[dstSub] == '\0')            break;    }        if(src[srcSub] == '\0' && dst[dstSub] == '\0')    {        match = true;        return;    }    else if(src[srcSub] == '*')    {        for(i = 0; dstlen >= dstSub + i; ++i)        {            isMatch(src, dst, srclen, dstlen, srcSub + 1, dstSub + i);        }    }    else if(src[srcSub] == '%')    {        for(i = 0; dstlen >= dstSub + i && dst[dstSub+i] != '\/'; ++i)        {            isMatch(src, dst, srclen, dstlen, srcSub + 1, dstSub + i);        }    }}int main() {    char *src = "http://www.*.com/%";    char *dst = "http://www.baidu.com/index";    isMatch(src, dst, strlen(src), strlen(dst), 0, 0);    if(match)        printf("match\n");    else        printf("not match\n");    system("pause");    return 0;}

热点排行

C语言

求高手，URL匹配算法,该怎么解决