Python操作Excel

常用的方式

常用的读写Excel的库:

  • pandas
  • openpyxl
  • xlrd/xlwt/xlutils

使用它们都能够达到读写Excel的目的,但它们的侧重点又略有不同。

具体如下:

  • pandas:数据处理最常用的分析库之一,可以读取各种各样格式的数据文件,一般输出dataframe格式,功能强大
  • openpyxl:主要针对xlsx格式的excel进行读取和编辑
  • xlrd库:从excel中读取数据,支持xls、xlsx
  • xlwt库:对excel进行修改操作,不支持对xlsx格式的修改
  • xlutils库:在xlwt和xlrd中,对一个已存在的文件进行修改
  • xlwings:对xlsx、xls、xlsm格式文件进行读写、格式修改等操作
  • xlsxwriter:用来生成excel表格,插入数据、插入图标等表格操作,不支持读取
  • Microsoft Excel API:需安装pywin32,直接与Excel进程通信,可以做任何在Excel里可以做的事情,但比较慢

对比

类型 xlrd/xlwt/xlutils openpyxl pandas
读取/写入/修改
xls ×
xlsx 高版本支持读 不支持写
大文件 ×
效率
功能 较弱 一般 强大
耗时 0.35s 0.47s 2.6s

推荐使用xlrd/xlwtpandas

xlrd/xlwt

安装依赖

利用xlrd和xlwt进行excel读写,这里只能是xls类型excel

1
2
pip install xlrd
pip install xlwt

读取Excel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import xlrd

book = xlrd.open_workbook('data.xlsx')

sheet1 = book.sheets()[0]

nrows = sheet1.nrows
print('表格总行数',nrows)

ncols = sheet1.ncols
print('表格总列数',ncols)

# 行的值
row3_values = sheet1.row_values(2)
print('第3行值',row3_values)

# 列的值
col3_values = sheet1.col_values(2)
print('第3列值',col3_values)

# 单元格的值
cell_1_3_1 = sheet1.cell(0,2).value
print('第1行第3列的单元格的值:',cell_1_3_1)
# 或者
cell_1_3_2 = sheet1.row_values(0)[2]
print('第1行第3列的单元格的值:',cell_1_3_2)
# 或者
cell_1_3_3 = sheet1.col_values(2)[0]
print('第1行第3列的单元格的值:',cell_1_3_3)

写入Excel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import xlwt
import datetime
# 创建一个workbook 设置编码
workbook = xlwt.Workbook(encoding='utf-8')
# 创建一个worksheet
worksheet = workbook.add_sheet('Worksheet')
# 写入excel参数对应 行, 列, 值
worksheet.write(0, 0, label='测试')
# 设置单元格宽度
worksheet.col(0).width = 3333

# 设置单元格高度
tall_style = xlwt.easyxf('font:height 520;')
worksheet.row(0).set_style(tall_style)

# 设置对齐方式
alignment = xlwt.Alignment() # Create Alignment
# May be: HORZ_GENERAL, HORZ_LEFT, HORZ_CENTER, HORZ_RIGHT, HORZ_FILLED, HORZ_JUSTIFIED, HORZ_CENTER_ACROSS_SEL, HORZ_DISTRIBUTED
alignment.horz = xlwt.Alignment.HORZ_CENTER
# May be: VERT_TOP, VERT_CENTER, VERT_BOTTOM, VERT_JUSTIFIED, VERT_DISTRIBUTED
alignment.vert = xlwt.Alignment.VERT_CENTER
style = xlwt.XFStyle() # Create Style
style.alignment = alignment # Add Alignment to Style
worksheet.write(2, 0, '居中', style)

# 写入带颜色背景的数据
pattern = xlwt.Pattern() # Create the Pattern
# May be: NO_PATTERN, SOLID_PATTERN, or 0x00 through 0x12
pattern.pattern = xlwt.Pattern.SOLID_PATTERN
pattern.pattern_fore_colour = 5 # May be: 8 through 63. 0 = Black, 1 = White, 2 = Red, 3 = Green, 4 = Blue, 5 = Yellow, 6 = Magenta, 7 = Cyan, 16 = Maroon, 17 = Dark Green, 18 = Dark Blue, 19 = Dark Yellow , almost brown), 20 = Dark Magenta, 21 = Teal, 22 = Light Gray, 23 = Dark Gray, the list goes on...
style = xlwt.XFStyle() # Create the Pattern
style.pattern = pattern # Add Pattern to Style
worksheet.write(0, 1, '颜色', style)

# 写入日期
style = xlwt.XFStyle()
# Other options: D-MMM-YY, D-MMM, MMM-YY, h:mm, h:mm:ss, h:mm, h:mm:ss, M/D/YY h:mm, mm:ss, [h]:mm:ss, mm:ss.0
style.num_format_str = 'M/D/YY'
worksheet.write(0, 2, datetime.datetime.now(), style)

# 写入公式
worksheet.write(0, 3, 5) # Outputs 5
worksheet.write(0, 4, 2) # Outputs 2
# Should output "10" (A1[5] * A2[2])
worksheet.write(1, 3, xlwt.Formula('D1*E1'))
# Should output "7" (A1[5] + A2[2])
worksheet.write(1, 4, xlwt.Formula('SUM(D1,E1)'))

# 写入超链接
worksheet.write(1, 0, xlwt.Formula('HYPERLINK("http://www.baidu.com";"百度一下")'))
# 保存
workbook.save('Excel_test.xls')

读取Excel保存JSON

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import xlrd
import json
import os

def excel_reader(filename, jsonname):
try:
# 打开刚才写入的test.xls文件
wb = xlrd.open_workbook(filename, on_demand=True)
userlist = []
# 遍历所有表单内容
for sh in wb.sheets():
classname = sh.name
# 列数
ncols = sh.ncols
for r in range(2, sh.nrows):
user = {}
user["level"] = sh.row_values(r)[1]
user["grade"] = sh.row_values(r)[2]
user["class"] = sh.row_values(r)[3]
user["name"] = sh.row_values(r)[4]
user["loginname"] = sh.row_values(r)[5]
user["loginpwd"] = "123456"
if (ncols >= 7):
user["sex"] = sh.row_values(r)[7]
userlist.append(user)
content = json.dumps(userlist, ensure_ascii=False)
writeFile(os.path.join(mpath, jsonname), content)
print(userlist)
except:
print("转换失败:" + filename)

def writeFile(filepath, content):
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
f.close()

if __name__ == '__main__':
mpath = os.getcwd()
excel_reader(os.path.join(mpath, '导入表格', 'import.xls'), "export.json")

文件写入

1
2
3
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
f.close()

第二个参数可选

  • w 没有创建 ,有则覆盖
  • a 没有创建,有则追加

JSON对象转字符串

1
content = json.dumps(userlist, ensure_ascii=False)

默认ensure_asciiTrue,中文会被编码

pandas

读取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#-- coding: utf-8 --

import pandas as pd

file_path = r'./demo.xls'
df = pd.read_excel(file_path, sheet_name = "Sheet1") # sheet_name不指定时默认返回全表数据

# 打印表数据,如果数据太多,会略去中间部分
print(df)

# 打印头部数据,仅查看数据示例时常用
print(df.head())

# 打印列标题
print(df.columns)

# 打印行
print(df.index)

# 打印指定列
print(df["name"])

# 描述数据
print(df.describe())

写入

1
2
3
4
5
from pandas import DataFrame

data = { 'name': ['zs', 'ls', 'ww'], 'age': [11, 12, 13], 'gender': ['man', 'man', 'woman']}
df = DataFrame(data)
df.to_excel('new.xlsx')

修改

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#-- coding: utf-8 --

import pandas as pd
from pandas import DataFrame

file_path = r'./demo.xls'
df = pd.read_excel(file_path)

df['gender'][df['gender'] == 'girl'] = 'female'
df['gender'][df['gender'] == 'boy'] = 'male'

print(df)

DataFrame(df).to_excel(file_path, sheet_name='Sheet1', index=False, header=True)

新增

1
2
3
4
5
6
7
8
# 新增一行
df.loc[6] = [5, 'Eric', 'male', 20, '2021-5-18']

# 新增一列
df['favorite'] = None

# 写入数据文件
DataFrame(df).to_excel(file_path, sheet_name='Sheet1', index=False, header=True)