URL正则表达式
我这个例子中的正则表达式必须有http前缀才能检测出来,现在需要改成无论带不带http前缀都能检测出来的。
正则不太熟,求调教~
#include "stdafx.h"
#include <regex>
#include <iostream>
#include <string>
using namespace std;
void show_url_parts(const std::wstring& url)
{
const tr1::wregex pattern(_T("(http[s]{0,1}|ftp)://[a-zA-Z0-9\\.\\-]+\\.([a-zA-Z]{2,4})(:\\d+)?(/[a-zA-Z0-9\\.\\-~!@#$%^&*+?:_/=<>]*)?")/*,tr1::wregex::icase*/);
tr1::wsmatch what;
wstring result = url;
std::wcout.imbue( locale(locale(), "", LC_CTYPE) );
while (tr1::regex_search(result, what, pattern))
{
std::wcout << what.prefix() << std::endl;
std::wcout << what[0] << std::endl;
result = what.suffix();
}
std::wcout << result << std::endl;
}
int main()
{
std::wstring url1 = _T("你好www.baidu.gov.cn第三方阿萨德.html的 https://www.b.com奥维德https://www.c.com收到");
show_url_parts(url1);
system("pause");
return 0;
}