阿里OCR识别(通用文字识别)

前言

通用文字识别

https://help.aliyun.com/zh/ocr/product-overview/common-character-recognition-1?spm=a2c4g.11186623.0.0.196c1613rFC7eC#topic-2084720

SDK文档

https://next.api.aliyun.com/api-tools/sdk/ocr-api?version=2021-07-07&language=csharp-tea&tab=primer-doc

在线测试-手写识别

https://next.api.aliyun.com/api/ocr-api/2021-07-07/RecognizeHandwriting?sdkStyle=dara

服务开通

https://ocr.console.aliyun.com/overview

注意

OCR统一识别通用文字识别是不同的产品,注意开通的时候别选错了。

资源包可以买共享资源包,同时可用于OCR统一识别通用文字识别

https://common-buy.aliyun.com/?spm=5176.23043878_1479102120.0.0.182e1cdcUlZNGP&commodityCode=ocr_share_dp_cn

C# 调用

安装依赖

1
Install-Package AlibabaCloud.SDK.Ocr-api20210707 -Version 3.1.1

识别在线文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
using System;

using AlibabaCloud.SDK.Ocr_api20210707.Models;

using Config;

using Tea;

public class AliOcrUtils {
public static AlibabaCloud.SDK.Ocr_api20210707.Client CreateClient()
{
// 工程代码泄露可能会导致 AccessKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考。
// 建议使用更安全的 STS 方式,更多鉴权访问方式请参见:https://help.aliyun.com/document_detail/378671.html。
AlibabaCloud.OpenApiClient.Models.Config config =
new AlibabaCloud.OpenApiClient.Models.Config
{
AccessKeyId = ZConfig.ALIBABA_CLOUD_ACCESS_KEY_ID,
AccessKeySecret = ZConfig.ALIBABA_CLOUD_ACCESS_KEY_SECRET,
};
// Endpoint 请参考 https://api.aliyun.com/product/ocr-api
config.Endpoint = "ocr-api.cn-hangzhou.aliyuncs.com";
return new AlibabaCloud.SDK.Ocr_api20210707.Client(config);
}

public static void Test()
{
AlibabaCloud.SDK.Ocr_api20210707.Client client = CreateClient();
RecognizeHandwritingRequest recognizeBasicRequest = new RecognizeHandwritingRequest
{
Url =
"https://img2.baidu.com/it/u=2401331858,865388201&fm=253&fmt=auto&app=138&f=JPEG?w=800&h=1093",
NeedRotate = false,
};
try
{
// 复制代码运行请自行打印 API 的返回值
RecognizeHandwritingResponse response = client.RecognizeHandwritingWithOptions(
recognizeBasicRequest,
new AlibabaCloud.TeaUtil.Models.RuntimeOptions()
);
if (response.Body.Code == null)
{
Console.WriteLine(response.Body.Data);
}
}
catch (TeaException error)
{
Console.WriteLine(error.Message);
}
catch (Exception error)
{
Console.WriteLine(error.Message);
}
}
}

识别本地文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
using System;
using System.IO;

using AlibabaCloud.SDK.Ocr_api20210707.Models;

using Config;

using Newtonsoft.Json.Linq;

using Tea;

public class AliOcrUtils
{
public static AlibabaCloud.SDK.Ocr_api20210707.Client CreateClient()
{
AlibabaCloud.OpenApiClient.Models.Config config =
new AlibabaCloud.OpenApiClient.Models.Config
{
AccessKeyId = ZConfig.ALIBABA_CLOUD_ACCESS_KEY_ID,
AccessKeySecret = ZConfig.ALIBABA_CLOUD_ACCESS_KEY_SECRET,
};
// Endpoint 请参考 https://api.aliyun.com/product/ocr-api
config.Endpoint = "ocr-api.cn-hangzhou.aliyuncs.com";
return new AlibabaCloud.SDK.Ocr_api20210707.Client(config);
}

public static string RecognizeText(string filePath)
{
string result = "";
AlibabaCloud.SDK.Ocr_api20210707.Client client = CreateClient();
using (FileStream fileStream = new FileStream(
filePath,
FileMode.Open,
FileAccess.Read
))
{
RecognizeHandwritingRequest recognizeBasicRequest = new RecognizeHandwritingRequest
{
Body = fileStream, NeedRotate = false,
};
try
{
// 复制代码运行请自行打印 API 的返回值
RecognizeHandwritingResponse response = client.RecognizeHandwritingWithOptions(
recognizeBasicRequest,
new AlibabaCloud.TeaUtil.Models.RuntimeOptions()
);
if (response.Body.Code == null)
{
JObject jsonObject = JObject.Parse(response.Body.Data);
if (jsonObject["content"] != null)
{
result = jsonObject["content"].ToString();
}
}
}
catch (TeaException error)
{
Console.WriteLine(error.Message);
}
catch (Exception error)
{
Console.WriteLine(error.Message);
}
}
return result;
}
}

调用

1
2
3
string filePath = "C:\\Users\\Administrator\\Pictures\\test.jpg";
string recognizeText = AliOcrUtils.RecognizeText(filePath);
Console.WriteLine(recognizeText);

入参和回参

https://next.api.aliyun.com/api/ocr-api/2021-07-07/RecognizeHandwriting?sdkStyle=dara

字段名称 字段详情
RequestId 请求唯一 ID示例值:43A29C77-405E-4CC0-BC55-EE694AD00655
Data 返回数据示例值:{"content":"炼句 提问方式 1.请赏析诗歌某一联(句) 2.赏析某一联(句)的妙处 3.请赏析诗歌某、角度抒胸意、借景抒情、托物","height":1277,"orgHeight":1277,"orgWidth":1080,"prism_version":"1.0.9","prism_wnum":26,"prism_wordsInfo":[{"angle":-87,"direction":0,"height":83,"pos":[{"x":177,"y":56},{"x":260,"y":60},{"x":259,"y":88},{"x":176,"y":84}],"prob":96,"width":28,"word":"炼句","x":203,"y":30}],"width":1080}
Code 状态码示例值:200
Message 详细信息示例值:message

跟文档不一样的是

返回成功的时候Code不是200,而是null。

Python

安装依赖

1
pip install alibabacloud_ocr_api20210707==3.1.2

识别在线图片

识别URL的图片

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# -*- coding: utf-8 -*-
# This file is auto-generated, don't edit it. Thanks.
import json
import os
import sys
from typing import List

from alibabacloud_ocr_api20210707 import models as ocr_api_20210707_models
from alibabacloud_ocr_api20210707.client import Client as ocr_api20210707Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient


class Sample:
def __init__(self):
pass

@staticmethod
def create_client() -> ocr_api20210707Client:
"""
使用AK&SK初始化账号Client
@return: Client
@throws Exception
"""
# 工程代码泄露可能会导致 AccessKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考。
# 建议使用更安全的 STS 方式,更多鉴权访问方式请参见:https://help.aliyun.com/document_detail/378659.html。
config = open_api_models.Config(
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。,
access_key_id=os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"],
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。,
access_key_secret=os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"],
)
# Endpoint 请参考 https://api.aliyun.com/product/ocr-api
config.endpoint = f"ocr-api.cn-hangzhou.aliyuncs.com"
return ocr_api20210707Client(config)

@staticmethod
def main(
args: List[str],
) -> None:
client = Sample.create_client()
recognize_basic_request = ocr_api_20210707_models.RecognizeBasicRequest(
url="https://www.psvmc.cn/zjtools/z/ocr_imgs/img118.png", need_rotate=False
)
try:
# 复制代码运行请自行打印 API 的返回值
result = client.recognize_basic_with_options(
recognize_basic_request, util_models.RuntimeOptions()
)
if result.status_code == 200:
data_str = result.body.data
json_obj = json.loads(data_str)
print(json_obj["content"])
except Exception as error:
# 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。
# 错误 message
print(error.message)
# 诊断地址
print(error.data.get("Recommend"))
UtilClient.assert_as_string(error.message)

@staticmethod
async def main_async(
args: List[str],
) -> None:
client = Sample.create_client()
recognize_basic_request = ocr_api_20210707_models.RecognizeHandwritingRequest(
url="https://www.psvmc.cn/zjtools/z/ocr_imgs/img118.png", need_rotate=False
)
try:
# 复制代码运行请自行打印 API 的返回值
result = await client.recognize_basic_with_options_async(
recognize_basic_request, util_models.RuntimeOptions()
)
print(result)
except Exception as error:
# 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。
# 错误 message
print(error.message)
# 诊断地址
print(error.data.get("Recommend"))
UtilClient.assert_as_string(error.message)


if __name__ == "__main__":
os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"] = "xxx"
os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"] = "xxx"
Sample.main(sys.argv[1:])

识别本地文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
# This file is auto-generated, don't edit it. Thanks.
import json
import os
import sys
from typing import List

from alibabacloud_ocr_api20210707 import models as ocr_api_20210707_models
from alibabacloud_ocr_api20210707.client import Client as ocr_api20210707Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient


class Sample:
def __init__(self):
pass

@staticmethod
def create_client() -> ocr_api20210707Client:
"""
使用AK&SK初始化账号Client
@return: Client
@throws Exception
"""
# 工程代码泄露可能会导致 AccessKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考。
# 建议使用更安全的 STS 方式,更多鉴权访问方式请参见:https://help.aliyun.com/document_detail/378659.html。
config = open_api_models.Config(
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。,
access_key_id=os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"],
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。,
access_key_secret=os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"],
)
# Endpoint 请参考 https://api.aliyun.com/product/ocr-api
config.endpoint = f"ocr-api.cn-hangzhou.aliyuncs.com"
return ocr_api20210707Client(config)

@staticmethod
def main(
args: List[str],
) -> None:
with open(
r"D:\Project\myblog\source\zjtools\z\ocr_imgs\img118.png", "rb"
) as image_file:
# image_file 现在是一个 BinaryIO 对象
client = Sample.create_client()
recognize_basic_request = ocr_api_20210707_models.RecognizeHandwritingRequest(
body=image_file,
need_rotate=False,
)
try:
# 复制代码运行请自行打印 API 的返回值
result = client.recognize_basic_with_options(
recognize_basic_request, util_models.RuntimeOptions()
)
if result.status_code == 200:
data_str = result.body.data
json_obj = json.loads(data_str)
print(json_obj["content"])
except Exception as error:
# 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。
# 错误 message
print(error.message)
# 诊断地址
print(error.data.get("Recommend"))
UtilClient.assert_as_string(error.message)


if __name__ == "__main__":
os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"] = "xxx"
os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"] = "xxx"
Sample.main(sys.argv[1:])

识别PIL的Image对象

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# -*- coding: utf-8 -*-
# This file is auto-generated, don't edit it. Thanks.
import io
import json
import os
import sys
from typing import List

from alibabacloud_ocr_api20210707 import models as ocr_api_20210707_models
from alibabacloud_ocr_api20210707.client import Client as ocr_api20210707Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
from PIL import Image


class Sample:
def __init__(self):
pass

@staticmethod
def create_client() -> ocr_api20210707Client:
"""
使用AK&SK初始化账号Client
@return: Client
@throws Exception
"""
config = open_api_models.Config(
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。,
access_key_id=os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"],
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。,
access_key_secret=os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"],
)
config.endpoint = f"ocr-api.cn-hangzhou.aliyuncs.com"
return ocr_api20210707Client(config)

@staticmethod
def main(
args: List[str],
) -> None:
image = Image.open(r"D:\Project\myblog\source\zjtools\z\ocr_imgs\img118.png")
# 将图像转换为灰度图像(如果需要)
image = image.convert("L")
# 创建一个BytesIO对象
image_file = io.BytesIO()

image.save(image_file, format="PNG")

# 将指针移动到流的起始位置
image_file.seek(0)

client = Sample.create_client()
recognize_basic_request = ocr_api_20210707_models.RecognizeHandwritingRequest(
body=image_file,
need_rotate=False,
)
try:
# 复制代码运行请自行打印 API 的返回值
result = client.recognize_basic_with_options(
recognize_basic_request, util_models.RuntimeOptions()
)
if result.status_code == 200:
data_str = result.body.data
json_obj = json.loads(data_str)
print(json_obj["content"])
except Exception as error:
# 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。
# 错误 message
print(error.message)
# 诊断地址
print(error.data.get("Recommend"))
UtilClient.assert_as_string(error.message)


if __name__ == "__main__":
os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"] = "xxx"
os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"] = "xxx"
Sample.main(sys.argv[1:])

识别OpenCV的numpy.ndarray

OpenCV加载图片后数据格式为numpy.ndarray,我们要把他转为需要的BinaryIO

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- coding: utf-8 -*-
# This file is auto-generated, don't edit it. Thanks.
import io
import json
import os
import sys
from typing import List

import cv2
import numpy as np
from alibabacloud_ocr_api20210707 import models as ocr_api_20210707_models
from alibabacloud_ocr_api20210707.client import Client as ocr_api20210707Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient


class Sample:
def __init__(self):
pass

@staticmethod
def read_img(filename, mode=cv2.IMREAD_COLOR):
# 先用numpy把图片文件存入内存:raw_data,把图片数据看做是纯字节数据
raw_data = np.fromfile(filename, dtype=np.uint8)
img = cv2.imdecode(raw_data, mode) # 从内存数据读入图片
return img

@staticmethod
def create_client() -> ocr_api20210707Client:
"""
使用AK&SK初始化账号Client
@return: Client
@throws Exception
"""
config = open_api_models.Config(
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。,
access_key_id=os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"],
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。,
access_key_secret=os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"],
)
config.endpoint = f"ocr-api.cn-hangzhou.aliyuncs.com"
return ocr_api20210707Client(config)

@staticmethod
def main(
args: List[str],
) -> None:
image = Sample.read_img(
r"D:\Project\myblog\source\zjtools\z\ocr_imgs\img118.png"
)

# 使用 cv2.imencode() 将 numpy.ndarray 编码为 JPEG 格式的字节流
success, buffer = cv2.imencode(".jpg", image) # 可以选择其他格式,如 '.png'

# 如果编码成功,将 buffer 转换为 bytes
if success:
image_bytes = buffer.tobytes()

# 创建一个内存中的文件对象(BinaryIO 类型)
image_file = io.BytesIO(image_bytes)

# 将文件指针移到起始位置
image_file.seek(0)

client = Sample.create_client()
recognize_basic_request = (
ocr_api_20210707_models.RecognizeHandwritingRequest(
body=image_file,
need_rotate=False,
)
)
try:
# 复制代码运行请自行打印 API 的返回值
result = client.recognize_basic_with_options(
recognize_basic_request, util_models.RuntimeOptions()
)
if result.status_code == 200:
data_str = result.body.data
json_obj = json.loads(data_str)
print(json_obj["content"])
except Exception as error:
# 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。
# 错误 message
print(error.message)
# 诊断地址
print(error.data.get("Recommend"))
UtilClient.assert_as_string(error.message)


if __name__ == "__main__":
os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"] = "xxx"
os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"] = "xxx"
Sample.main(sys.argv[1:])

工具类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*- coding: utf-8 -*-
# This file is auto-generated, don't edit it. Thanks.
import io
import json
import os

import cv2
import numpy as np
from alibabacloud_ocr_api20210707 import models as ocr_api_20210707_models
from alibabacloud_ocr_api20210707.client import Client as ocr_api20210707Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient


class AliOcrUtils:
def __init__(self):
pass

@staticmethod
def create_client() -> ocr_api20210707Client:
"""
使用AK&SK初始化账号Client
@return: Client
@throws Exception
"""
config = open_api_models.Config(
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。,
access_key_id=os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"],
# 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。,
access_key_secret=os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"],
)
config.endpoint = f"ocr-api.cn-hangzhou.aliyuncs.com"
return ocr_api20210707Client(config)

@staticmethod
def ocr_recognize(
image: np.ndarray,
) -> str:
# 使用 cv2.imencode() 将 numpy.ndarray 编码为 JPEG 格式的字节流
success, buffer = cv2.imencode(".jpg", image) # 可以选择其他格式,如 '.png'

# 如果编码成功,将 buffer 转换为 bytes
if success:
image_bytes = buffer.tobytes()

# 创建一个内存中的文件对象(BinaryIO 类型)
image_file = io.BytesIO(image_bytes)

# 将文件指针移到起始位置
image_file.seek(0)

client = AliOcrUtils.create_client()
recognize_basic_request = (
ocr_api_20210707_models.RecognizeHandwritingRequest(
body=image_file,
need_rotate=False,
)
)
try:
# 复制代码运行请自行打印 API 的返回值
result = client.recognize_basic_with_options(
recognize_basic_request, util_models.RuntimeOptions()
)
if result.status_code == 200:
data_str = result.body.data
json_obj = json.loads(data_str)
return json_obj["content"]
except Exception as error:
# 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。
# 错误 message
print(error.message)
# 诊断地址
print(error.data.get("Recommend"))
UtilClient.assert_as_string(error.message)
return ""