目录
条形码获取商品信息
验证码识别:
pip install ddddocr
条形码商品信息网站:
http://tiaoma.cnaidc.com/
条形码获取商品信息
import logging
import ddddocr
import requests
import json
import os
import time
import sys
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'}
path = os.path.abspath(os.path.dirname(sys.argv[0]))
def parse_json(s):
begin = s.find('{')
end = s.rfind('}') + 1
return json.loads(s[begin:end])
# 爬取 "tiaoma.cnaidc.com" 来查找商品信息
def requestT1(shop_id):
url = 'http://tiaoma.cnaidc.com'
s = requests.session()
# 获取验证码
img_data = s.get(url + '/index/verify.html?time=', headers=headers).content
with open('verification_code.png', 'wb') as v:
v.write(img_data)
# 解验证码
ocr = ddddocr.DdddOcr()
with open('verification_code.png', 'rb') as f:
img_bytes = f.read()
code = ocr.classification(img_bytes)
logging.info('当前验证码为 ' + code)
# 请求接口参数
data = {"code": shop_id, "verify": code}
resp = s.post(url + '/index/search.html', headers=headers, data=data)
resp_json = parse_json(resp.text)
logging.info(resp_json)
# 判断是否查询成功
if resp_json['msg'] == '查询成功' and resp_json['json'].get('code_img'):
# 保存商品图片
img_url = ''
if resp_json['json']['code_img'].find('http') == -1:
img_url = url + resp_json['json']['code_img']
else:
img_url = resp_json['json']['code_img']
try:
shop_img_data = s.get(img_url, headers=headers, timeout=10, ).content
os.makedirs(path + '\\' + shop_id, exist_ok=True)
localtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
# 保存图片
with open(path + '\\' + shop_id + '\\' + str(localtime) + '.png', 'wb') as v:
v.write(shop_img_data)
logging.info(path + '\\' + shop_id + '\\' + str(localtime) + '.png')
except requests.exceptions.ConnectionError:
logging.info('访问图片URL出现错误!')
if resp_json['msg'] == '验证码错误':
requestT1(shop_id)
return resp_json
if __name__ == "__main__":
try:
dict_info = requestT1('6901028001915')['json']
# dict_info = requestT1('2010777000019')['json']
print('-----------------------------------------------------------')
print(dict_info['code_sn'])
print(dict_info['code_name'])
print(dict_info['code_company'])
print(dict_info['code_address'])
print(dict_info['code_price'])
except:
print('商品无法查询!')