批量文件操作脚本
2012年9月30日
1 Python脚本
flow.py[options] operate filename1/2 --dir directory
options表示选项,目前支持三个选项:
--re,表示regularexpression,被匹配的文件名字以正则表达式方式指定;
--cd,表示changedirectory,该操作会影响到目录;没有该选项时,所有的操作只针对普通文件。
--onlydir,表示被操作的对象只有目录,不操作普通文件。
operate字段,表示操作类型,目前支持三种操作类型:
--changename oldname newname:将与oldname匹配的文件或目录的名字修改为newname。
--delete filename:将与filename匹配的文件或者目录删除掉。
--extract filename:将与filename匹配的文件提取到exdir指定的目录下面,若该目录不存在,则新创建目录。提取的方式以copy方式实现,不影响原有的目录结构。提取的结果保持原来的层次结构,会新创建对应的目录树。
目录相关选项:
目录指定可以是绝对路径(如C:/MyFiles/)或相对路径(如./test/)。注意,如果输入的路径包含空格,需要使用引号,如”C:/test app/project”。
--dir directory:用于指定被操作文件所在的根目录,所有的操作都发生在该目录下。
--extractdir extdirectory:仅用于提取操作(extract)时候,指定存放提取文件的目的目录,若该目录不存在,将新建目录。
test.temp表示匹配名字叫test.temp所有的文件,而*.txt表示,匹配扩展名为txt的所有文件。而当使用选项--re时,可以用正则表达式指定被操作的文件,如bk.*[0-9]+\.cpp,表示匹配以bk开头,以数字串结尾的所有cpp文件。
--cd命令行选项。若只想操作目录,则使用--onlydir选项。
1.1修改文件名
flow.py[options] --changename f1 f2 --dir directoty
%D:表示当前日期(如20121013)
%T:表示当前时间(如165819)
%C:表示按照序号递增方式命名。
示例:
flow.py--changename tmp.txt tmp_bk.txt --dir F:/project
flow.py--changename *.conf %D.conf%C.bk--dir ./
flow.py--re --changename (.*)\.java \1.jbk --dir ./test

执行Python脚本:

更改后的文件夹的内容:

1.2删除文件
示例:
flow.py--delete test.txt --dir F:/testdir/
flow.py--delete *.cpp --dir F:/testcpp/
flow.py--re --cd --delete .*test.* --dir F:/testdir/
flow.py--onlydir --delete .svn --dir F:/project/test/

例如其中res下包含.svn目录:

执行Python脚本过程:

删除.svn后的目录(示例):

1.3提取文件
示例:
flow.py--extract *.java --dir F:/project/test --extractdir F:/project/testjavabackup


2 源码实现
# -*- coding:utf-8 -*-#This program is used for routine operations.#----------------------------------------#Process a bunch of files or directories:#1)change names.#2)delete files or dir.#3)extract certain files or dir.#----------------------------------------#Time:2012-8-27 21:39:36#Author:Aspirationfrom __future__ import print_functionimport os,re,sysimport statimport timeimport errnoimport shutilusage='''Input cmdline format: "flow.py [--options] --changename filename newname --dir directory "flow.py [--options] --delete filename --dir directory "flow.py [--options] --extract filename --dir directory --extractdir extractdiroptions meaning:--re:use regular expression to search files. It's optional.--cd:operations will influence directory(changename/delete). It's optional. --onlyddir:Only operate on directories,not influence regular files. It's optional.--changename:change file or directory names. Newname can use special syntax: %D:current date;%T:current time;%C:increase counter. Example:"--change test*.txt test%D-%T.txt" this command line will use date and time to rename files. --delete:delete files.--extract:extract certain files in directory.--dir: operate on which directory.--extractdir:this option is for extract operation providing destination directory for Example:1)flow.py --changename test*.txt test_%C.txt --dir ./testdir/ meaning:rename all txt files with header(test) to test_1.txt test_2.txt ...2)flow.py --onlydir --delete .svn ./ meaning:delete all svn related directory in current directory.'''def remove_readonly(func, path, exc): excvalue = exc[1] if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES: # ensure parent directory is writeable too pardir = os.path.abspath(os.path.join(path, os.path.pardir)) if not os.access(pardir, os.W_OK): os.chmod(pardir, stat.S_IRWXU| stat.S_IRWXG| stat.S_IRWXO) os.chmod(path, stat.S_IRWXU| stat.S_IRWXG| stat.S_IRWXO) # 0777 func(path) else: raiseclass Ops(): chopt="--changename" delopt="--delete" exopt="--extract" def __init__(self): self.wildcard=True #default supporting wildcard. self.cd=False #change directory. self.cf=True #change files. self.operation="" self.count=0 #record total operations numbers. self.argsanity=True #indicate whether arguments are sane. self.dir="" self.filename="" self.newname="" self.extractdir="./extract/" #Parse detailed command line arguments. self.parseargs() if self.dir=="": print("Error! You should specify directory with --dir xxx") self.argsanity=False if self.operation=="": print("Error! You should specify operation.") self.argsanity=False def run(self): if self.argsanity==False: return #Here we process wildcard case. try: tempname=self.filename if self.wildcard==True: tempname=re.escape(tempname) tempname=str(tempname).replace("\*",".*") tempname=str(tempname).replace("\?",".") tempname="^"+tempname+"$" regex=re.compile(tempname) except re.error,e: print("Regular expression isn't correct.") #print(e) exit(1) except: print("Error!") exit(1) #Workhorse runs here. try: if self.operation==Ops.chopt: self.changename(regex,self.newname,self.dir) elif self.operation==Ops.delopt: self.deletefile(regex,self.dir) elif self.operation==Ops.exopt: self.extractfile(regex,self.dir,self.extractdir) except OSError,e: print("Error:Operating System call can't be OK!") if isinstance(e,WindowsError): print("WindowsError:"+str(e)) else: print(e) exit(1) except: print("Error!") raise #exit(1) def changename(self,oldregex,newname,directory): '''changename() is to change file or directory names. It support normal names or wildcard or regular expression. ''' if oldregex==newname: return if not os.path.isdir(directory): print("Directory:"+str(directory)+" doesn't exist!") return identicalcnt=0 files=os.listdir(directory) for f in files: #print("file:"+str(f)) path=directory+os.sep+str(f) #print("path:"+path) if os.path.isdir(path): self.changename(oldregex,newname,path) if (self.cf and os.path.isfile(path)) or (self.cd and os.path.isdir(path)): if re.match(oldregex,f): self.count=self.count+1 #print("self.count="+str(self.count)) #print("filename:"+str(f)) tempname=newname tempname=self.parsename(tempname) tempname=re.sub(oldregex,tempname,str(f)) #print("newname:"+str(tempname)) newpath,tempname,identicalcnt=self.getnewfilepath(\ directory,tempname,identicalcnt) #print("newpath:"+newpath) os.rename(path,newpath) print("change file {0} to {1}".format(path,tempname)) def deletefile(self,fileregex,directory): '''deletefile() is to delete file or directory. It support normal names or wildcard or regular expression. ''' if not os.path.isdir(directory): print("Directory:"+str(directory)+" doesn't exist!") return files=os.listdir(directory) for f in files: #print("file:"+str(f)) path=directory+os.sep+str(f) #print("path:"+path) if self.cf and os.path.isfile(path): if re.match(fileregex,f)!=None: self.count=self.count+1 try: os.remove(path) except: if not os.access(path,os.W_OK): os.chmod(path,stat.S_IRWXU|stat.S_IRWXG|stat.S_IRWXO) os.remove(path) print("The file isn't allowed to delete.") else: print("Error! remove:"+path) else: print("delete {0} ".format(path)) if os.path.isdir(path): if self.cd and re.match(fileregex,f)!=None: self.count=self.count+1 shutil.rmtree(path,ignore_errors=False, onerror=remove_readonly) print("delete dir {0} ".format(path)) else: self.deletefile(fileregex,path) def extractfile(self,fileregex,directory,newdirectory): '''extractfile() is to extract file or directory to specified dir. It support normal names or wildcard or regular expression. The extracted files will remain the orignal hierarchy. ''' if not os.path.isdir(directory): print("Directory:"+str(directory)+" doesn't exist!") return files=os.listdir(directory) for f in files: #print("file:"+str(f)) path=directory+os.sep+str(f) newpath=newdirectory+os.sep+str(f) #print("path:"+path) #print("newpath:"+newpath) if os.path.isdir(path): if self.cd and re.match(fileregex,f)!=None: self.count=self.count+1 if os.path.exists(newpath): shutil.rmtree(newpath) shutil.copytree(path,newpath) print("copy dir {0} to {1} ".format(f,newdirectory)) else: self.extractfile(fileregex,path,newpath) elif os.path.isfile(path): if self.cf and re.match(fileregex,f)!=None: self.count=self.count+1 #print("match:"+f) if not os.path.exists(newdirectory): os.makedirs(newdirectory) shutil.copy2(path,newdirectory) print("copy {0} to {1} ".format(f,newdirectory)) else: print("Error! We can only process files and directories!") #parse command line arguments. def parseargs(self): argnum=len(sys.argv) #we need at least 5 args, #E.g."flow.py --delete test --dir ./test" if argnum <5: print(usage) return i=1 try: while(i<argnum): if sys.argv[i][:2]=="--": #Options if sys.argv[i]==Ops.chopt: self.operation=sys.argv[i] self.filename=sys.argv[i+1] self.newname=sys.argv[i+2] i=i+2 elif sys.argv[i]==Ops.delopt: self.operation=sys.argv[i] self.filename=sys.argv[i+1] i=i+1 elif sys.argv[i]==Ops.exopt: self.operation=sys.argv[i] self.filename=sys.argv[i+1] i=i+1 elif sys.argv[i]=="--dir": self.dir=sys.argv[i+1] i=i+1 elif sys.argv[i]=="--re": self.wildcard=False elif sys.argv[i]=="--cd": self.cd=True elif sys.argv[i]=="--onlydir": self.cf=False self.cd=True elif sys.argv[i]=="--extractdir": if self.operation==Ops.exopt: self.extractdir=sys.argv[i+1] i=i+1 else: print("Error! only extract can have --extractdir option") return else: print("Error! unknown options:"+sys.argv[i]) return else: #if not options,we ignore them. print("unknown commandline data:"+sys.argv[i]) i=i+1 except Exception,e: print("Error! command line format isn't correct.") #print(usage) exit(1) #for arg in sys.argv: # print(arg,end=' ') #print(" ") #Parse special symbol input from command line. def parsename(self,namepattern): if str(namepattern).find("%C")!=-1: #error: here the count should not be self.count namepattern=str(namepattern).replace("%C",str(self.count),1) if str(namepattern).find("%T")!=-1: curtime=time.strftime("%H%M%S",time.localtime()) # print("time:"+str(curtime)) namepattern=str(namepattern).replace("%T",curtime,1) if str(namepattern).find("%D")!=-1: curdate=time.strftime("%Y%m%d",time.localtime()) # print("date:"+str(curdate)) namepattern=str(namepattern).replace("%D",curdate,1) return namepattern #get available filepath for a file. #Mainly to check existed files with same name. def getnewfilepath(self,parentdir,filename,cnt): newfilepath=parentdir+os.sep+str(filename) if os.path.exists(newfilepath): parts=str(filename).rsplit(".",1) #print(parts) while os.path.exists(newfilepath): cnt=cnt+1 #print("cnt:"+str(cnt)) if len(parts)==1: newfilepath=newfilepath+str(cnt) else: filename=parts[0]+str(cnt)+"."+parts[1] newfilepath=parentdir+os.sep+filename return (newfilepath,filename,cnt) if __name__=="__main__": routineops=Ops(); print("\nRESULT") print("--------------------------") routineops.run()