python批量转换文件编码
via:?http://www.g2w.me/2012/02/python-batch-convert-file-encodings/
?
今天在 eclipse 中导入了个之前的 swing 项目,结果跑起来后乱码,检查代码发现竟然一部分 java 文件是 utf-8>>> import chardet>>> chardet.detect(open(r'E:\Workspaces\java\GCHMCreator\main\g2w\app\gchm\gui\ContentElement.java').read()){'confidence': 0.99, 'encoding': 'GB2312'}
detect文件返回的是一个字典,其中?encoding
?的值为检测到的#-*- coding: utf-8 -*-import codecsimport osimport shutilimport reimport chardetdef convert_encoding(filename, target_encoding): # Backup the origin file. shutil.copyfile(filename, filename + '.bak') # convert file from the source encoding to target encoding content = codecs.open(filename, 'r').read() source_encoding = chardet.detect(content)['encoding'] print source_encoding, filename content = content.decode(source_encoding) #.encode(source_encoding) codecs.open(filename, 'w', encoding=target_encoding).write(content)def main(): for root, dirs, files in os.walk(os.getcwd()): for f in files: if f.lower().endswith('.java'): filename = os.path.join(root, f) try: convert_encoding(filename, 'utf-8') except Exception, e: print filenamedef process_bak_files(action='restore'): for root, dirs, files in os.walk(os.getcwd()): for f in files: if f.lower().endswith('.java.bak'): source = os.path.join(root, f) target = os.path.join(root, re.sub('\.java\.bak$', '.java', f, flags=re.IGNORECASE)) try: if action == 'restore': shutil.move(source, target) elif action == 'clear': os.remove(source) except Exception, e: print sourceif __name__ == '__main__': # process_bak_files(action='clear') main()