700字范文 > python 操作word页眉表格_Python 如何对word文档（.docx）的页眉和页脚进行编辑？

python 操作word页眉表格_Python 如何对word文档（.docx）的页眉和页脚进行编辑？

时间：2018-12-29 02:21:03

刚好接了个任务要批量修改一批docx页眉，经过一天面向so编程，以下是代码。因为本人只是业余，代码是能用就好，如有错误请指出。

PS1.对齐什么的懒得调了。

PS2.不适用于doc格式

#批量修改docx文件页眉

import os,zipfile,tempfile

import shutil as su

from bs4 import BeautifulSoup as bs

import codecs

#一个读取word/header1.xml的函数

#用于读取模板docx文件的页眉xml

def get_word_xml(docxpath):

with zipfile.ZipFile(docxpath) as zf:

xml_content = zf.read('word/header1.xml')

return xml_content

#解压缩docx，替换header1.xml，重新打包压缩的函数

def zipdocx(inputdocx,outputdocx,edited_xml):

#因为之后出现无法删除tmp文件夹的事故，修改了默认文件夹位置

temppath = r'your dir\temp'

tmp_dir = tempfile.mkdtemp(dir = temppath)

#获取解压缩之前的文件树状结构，方便重新压缩

zf = zipfile.ZipFile(inputdocx)

filenames = zf.namelist()

zf.extractall(tmp_dir)

#替换header1

with codecs.open(os.path.join(tmp_dir,'word/header1.xml'),'w',encoding = 'utf8') as f:

f.write(str(edited_xml))

#如果有header2，用header1替换header2

#出现header2的原因是设置了首页页眉不同，所以将两个文件内容统一即可

try:

with codecs.open(os.path.join(tmp_dir,'word/header2.xml'),'w',encoding = 'utf8') as f:

f.write(str(edited_xml))

except:

pass

#打包替换后的全部文件

zip_copy_name = outputdocx

with zipfile.ZipFile(zip_copy_name,"w") as docx:

for filename in filenames:

docx.write(os.path.join(tmp_dir,filename),filename)

#不明原因删除temp失败orz，之后手动删除。如果前面没有修改默认temp目录，可能需要重启电脑。

try:

su.rmtree(tmp_dir)

except:

pass

#输入文件夹

inputpath = r'your dir\docxfiles'

#输出文件夹

outputpath = r'your dir\output'

#作为模板的docx文件，事先改好页眉格式，本脚本只修改文字。

headerSAMPLE = r'your dir\SAMPLE.docx'

#修改内容

something = r''

#获取输入文件夹内文件列表

inputdocxfiles = os.listdir(inputpath)

for inputdocxfile in inputdocxfiles:

if inputdocxfile.endswith('.docx'):

intputdocx = os.path.join(inputpath,inputdocxfile)

outputdocxfile = inputdocxfile.rstrip('.docx') + '-edited.docx'

outputdocx = os.path.join(outputpath,outputdocxfile)

#BeautifulSoup处理xml文件，无需关心xml namespace

tree = get_word_xml(headerSAMPLE)

soup = bs(tree,'xml')

#修改‘w:t’的.string属性即可修改页眉文字

#这里将文字放置在第一个‘w:t’即可，剩下的留空

for i in range(len(headers)):

if i == 0:

soup.find_all('w:t')[i].string = something

else:

soup.find_all('w:t')[i].string = ''

#main

if __name__ == '__main__':

zipdocx(intputdocx,outputdocx,soup)

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。