回答

收藏

年报下载代码

提示词工程 提示词工程 133 人阅读 | 0 人回复 | 2025-09-28

  1. import json
  2. import os
  3. from time import sleep
  4. from urllib import parse

  5. import requests


  6. def get_adress(bank_name):   
  7.     url = "http://www.cninfo.com.cn/new/information/topSearch/detailOfQuery"
  8.     data = {
  9.         'keyWord': bank_name,
  10.         'maxSecNum': 10,
  11.         'maxListNum': 5,
  12.     }
  13.     hd = {
  14.         'Host': 'www.cninfo.com.cn',
  15.         'Origin': 'http://www.cninfo.com.cn',
  16.         'Pragma': 'no-cache',
  17.         'Accept-Encoding': 'gzip,deflate',
  18.         'Connection': 'keep-alive',
  19.         'Content-Length': '70',
  20.         'User-Agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 75.0.3770.100Safari / 537.36',
  21.         'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  22.         'Accept': 'application/json,text/plain,*/*',
  23.         'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  24.     }
  25.     r = requests.post(url, headers=hd, data=data)
  26.     print(r.text)
  27.     r = r.content
  28.     m = str(r, encoding="utf-8")
  29.     pk = json.loads(m)
  30.     orgId = pk["keyBoardList"][0]["orgId"]   #获取参数
  31.     plate = pk["keyBoardList"][0]["plate"]
  32.     code = pk["keyBoardList"][0]["code"]
  33.     print(orgId,plate,code)
  34.     return orgId, plate, code


  35. def download_PDF(url, file_name):   #下载pdf
  36.     url = url
  37.     r = requests.get(url)
  38.     f = open(bank +"/"+ file_name + ".pdf", "wb")
  39.     f.write(r.content)


  40. def get_PDF(orgId, plate, code):
  41.     url = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
  42.     data = {
  43.         'stock': '{},{}'.format(code, orgId),
  44.         'tabName': 'fulltext',
  45.         'pageSize': 30,
  46.         'pageNum': 1,
  47.         'column': plate,
  48.         'category': 'category_ndbg_szsh;',
  49.         'plate': '',
  50.         'seDate': '',
  51.         'searchkey': '',
  52.         'secid': '',
  53.         'sortName': '',
  54.         'sortType': '',
  55.         'isHLtitle': 'true',
  56.     }

  57.     hd = {
  58.         'Host': 'www.cninfo.com.cn',
  59.         'Origin': 'http://www.cninfo.com.cn',
  60.         'Pragma': 'no-cache',
  61.         'Accept-Encoding': 'gzip,deflate',
  62.         'Connection': 'keep-alive',
  63.         # 'Content-Length': '216',
  64.         'User-Agent': 'User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
  65.         'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  66.         'Accept': 'application/json,text/plain,*/*',
  67.         'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  68.         'X-Requested-With': 'XMLHttpRequest',
  69.         # 'Cookie': cookies
  70.     }
  71.     data = parse.urlencode(data)
  72.     print(data)
  73.     r = requests.post(url, headers=hd, data=data)
  74.     print(r.text)
  75.     r = str(r.content, encoding="utf-8")
  76.     r = json.loads(r)
  77.     reports_list = r['announcements']
  78.     for report in reports_list:
  79.         if '摘要' in report['announcementTitle'] or "20" not in report['announcementTitle']:
  80.             continue
  81.         if 'H' in report['announcementTitle']:
  82.             continue
  83.         else:  # http://static.cninfo.com.cn/finalpage/2019-03-29/1205958883.PDF
  84.             pdf_url = "http://static.cninfo.com.cn/" + report['adjunctUrl']
  85.             file_name = report['announcementTitle']
  86.             print("正在下载:"+pdf_url,"存放在当前目录:/"+bank+"/"+file_name)
  87.             download_PDF(pdf_url, file_name)
  88.             sleep(2)


  89. if __name__ == '__main__':
  90.     # bank_list = [ '中信银行', '兴业银行', '平安银行','民生银行', '华夏银行','交通银行', '中国银行', '招商银行', '浦发银行','建设银行', ]
  91.     bank_list = ["新海宜科技集团股份有限公司","中信银行"]
  92.     for bank in bank_list:
  93.         os.mkdir(bank)
  94.         orgId, plate, code = get_adress(bank)
  95.         get_PDF(orgId, plate, code)
  96.         print("下一家~")
  97.     print("All done!")
复制代码



分享到:
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

143 积分
25 主题
热门推荐