VB使用Windows api从剪贴板中获取HTML类型中文内容时有乱码产生
最近想写个程序来辅助完成日常工作。其中有个需求是,从网页上复制内容,包括表格、文字、图片等信息。所以,我就想到使用Windows api来监视剪贴板(程序代码1),并且来获取CF_HTML类型的数据,因为只有这样才能保留下表格相关信息等等。在编写过程中,我遇到了一个很难缠的问题,是关于UTF-8到Unicode编码问题。经查找已经有个解决方法(程序代码2)。但是经过测试,这个方法也不完善,在复制如(程序代码3)这样的奇数个汉字时会出现一个“?”(<P>中国?/P>)
因此,我特来向高手请教。有解决方法没,原因是什么?
程序代码1
Public Declare Function SetWindowLong Lib "user32" Alias "SetWindowLongA" _
(ByVal hwnd As Long, ByVal nIndex As Long, ByVal dwNewLong As Long) As Long
Public Declare Function CallWindowProc Lib "user32" Alias "CallWindowProcA" _
(ByVal lpPrevWndFunc As Long, ByVal hwnd As Long, ByVal Msg As Long, ByVal wParam As Long, ByVal lParam As Long) As Long
Public Declare Function SendMessage Lib "user32" Alias "SendMessageA" _
(ByVal hwnd As Long, ByVal wMsg As Long, ByVal wParam As Long, lParam As Long) As Long
Public Declare Function SetClipboardViewer Lib "user32" _
(ByVal hwnd As Long) As Long
Public Declare Function ChangeClipboardChain Lib "user32" _
(ByVal hwnd As Long, ByVal hWndNext As Long) As Long
Public Declare Function IsClipboardFormatAvailable Lib "user32" _
(ByVal wFormat As Long) As Long
Public Declare Function RegisterClipboardFormat Lib "user32" Alias "RegisterClipboardFormatA" _
(ByVal lpString As String) As Long
Public Declare Function OpenClipboard Lib "user32" (ByVal hwnd As Long) As Long
Public Declare Function CloseClipboard Lib "user32" () As Long
Public Declare Function GetClipboardData Lib "user32" (ByVal wFormat As Long) As Long
Public Declare Function GlobalLock Lib "kernel32" (ByVal hMem As Long) As Long
Public Declare Function GlobalUnlock Lib "kernel32" (ByVal hMem As Long) As Long
Public Declare Function lstrlen Lib "kernel32" Alias "lstrlenA" (ByVal lpData As Long) As Long
Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" _
(pDest As Any, pSource As Any, ByVal cbLength As Long)
Public Declare Function MultiByteToWideChar Lib "kernel32" _
(ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, _
ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Public lpPrevWndProc As Long
Public lClipboardChain As Long
Public Const GWL_WNDPROC = -4
Public Const WM_DRAWCLIPBOARD = &H308
Public Const WM_CHANGECBCHAIN = &H30D
Public CF_HTML As Long
Public Const CP_UTF8 = 65001
Public Sub hook(hwnd As Long)
' 在这里使用了 AddressOf,得到的是WindowProc 函数的地址
'GWL_WNDPROC告诉SetWindowLong 函数将使用SubClass
lpPrevWndProc = SetWindowLong(hwnd, GWL_WNDPROC, AddressOf WindowProc)
'SubClass需要保存调用时返回的指针以便做其他处理
lClipboardChain = SetClipboardViewer(hwnd)
CF_HTML = RegisterClipboardFormat("HTML Format")
End Sub
Public Sub unHook(hwnd As Long)
SetWindowLong hwnd, GWL_WNDPROC, lpPrevWndProc
ChangeClipboardChain hwnd, lClipboardChain
End Sub
Public Function WindowProc(ByVal hwnd As Long, ByVal uMsg As Long, ByVal wParam As Long, ByVal lParam As Long) As Long
Select Case uMsg
Case WM_DRAWCLIPBOARD
'剪贴板改变时你要做什么处理就是写在这里了
If CBool(OpenClipboard(hwnd)) Then
Dim data As String
Dim clipSize As Long
Dim memHandle As Long
Dim dataHandle As Long
GlobalUnlock memHandle
memHandle = GetClipboardData(CF_HTML)
If CBool(memHandle) Then
dataHandle = GlobalLock(memHandle)
If dataHandle <> 0 Then
clipSize = lstrlen(dataHandle)
data = String(clipSize + 10, 0)
Call CopyMemory(ByVal data, ByVal dataHandle, clipSize)
Dim startFrag As Long
Dim endFrag As Long
Dim index As Long
Dim result As String
startFrag = InStr(data, "<!--StartFragment-->") + Len("<!--StartFragment-->")
endFrag = InStr(data, "<!--EndFragment-->")
If startFrag > 0 And endFrag > 0 Then
result = Mid(data, startFrag, endFrag - (startFrag))
End If
'不知道为什么使用官方的方法不可行。还存在乱码情况。
'index = InStr(data, "StartFragment:")
'If index Then
'startFrag = CLng(Mid(data, index + Len("StartFragment:"), 10))
'End If
'index = InStr(data, "EndFragment:")
'If index Then
'endFrag = CLng(Mid(data, index + Len("EndFragment:"), 10))
'End If
'If startFrag > 0 And endFrag > 0 Then
'result = Mid(data, startFrag, (endFrag - startFrag))
'End If
'Debug.Print "startFragment: " & startFrag; "- endFragment: " & endFrag
'result = Mid(data, 339, 43)
Debug.Print data
mainForm.Text1.Text = UTF8_Decode(result)
End If
End If
End If
Call CloseClipboard
SendMessage lClipboardChain, WM_DRAWCLIPBOARD, 0, 0
Case WM_CHANGECBCHAIN
If lClipboardChain = wParam Then
lClipboardChain = lParam
End If
Case Else
WindowProc = CallWindowProc(lpPrevWndProc, hwnd, uMsg, wParam, lParam)
End Select
End Function
'Purpose:Convert Utf8 to Unicode
Public Function UTF8_Decode(ByVal sUTF8 As String) As String
Dim lngUtf8Size As Long
Dim strBuffer As String
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
Dim n As Long
If LenB(sUTF8) = 0 Then Exit Function
On Error GoTo EndFunction
bytUtf8 = StrConv(sUTF8, vbFromUnicode)
lngUtf8Size = UBound(bytUtf8) + 1
On Error GoTo 0
lngBufferSize = lngUtf8Size * 2
strBuffer = String$(lngBufferSize, vbNullChar)
'Translate using code page 65001(UTF-8)
lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _
lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
'Trim result to actual length
If lngResult Then
UTF8_Decode = Left$(strBuffer, lngResult)
End If
EndFunction:
End Function
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>test</title>
</head>
<body>
<P>中国语</P>
</body>
</html>