1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
| from bs4 import BeautifulSoup import re import requests import traceback
def GetHTMltext(url,code = 'utf-8'): try: kv = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.0.0", "cookie":"qgqp_b_id=1fa9098b5972ea7e3036dbdc3ff568a6; st_si=45750356129814; st_asi=delete; td_cookie=802729162; HAList=ty-1-600588-%u7528%u53CB%u7F51%u7EDC; st_pvi=40869652126604; st_sp=2023-08-25%2015%3A45%3A59; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=4; st_psi=20230825161637105-113200301321-4929148757"} r = requests.get(url,headers=kv,timeout=30) r.raise_for_status() r.encoding = code return r.text except: return "获取页面失败" def getstockList(lst,stockURl): html = GetHTMltext(stockURl,'GB2312') soup = BeautifulSoup(html,'html.parser') a = soup.find_all('a') for i in a: try: href = i.attrs['href'] lst.append(re.findall(r'[s][hz]\d{6}',href)[0]) except: continue
def getstockInfo(lst,stockURl,fpath): count = 0 for stock in lst: url = stockURl+stock+".html" html = GetHTMltext(url) try: if html ==" ": continue infoDict = {} soup = BeautifulSoup(html, 'html.parser') stockInfo = soup.find('div',attrs={'class':'stock_top clearfix'}) name = soup.find('h1').string.strip() infoDict.update({"股票名称":name.split()[0]}) Keylist = stockInfo.find_all('dt') valuelist = stockInfo.find_all('dd')
for i in range(len(Keylist)): key = Keylist[i].string val = valuelist[i].string infoDict[key] = val with open(fpath,'a',encoding='utf-8') as f: f.write(str(infoDict)+'\n') count = count+1 print('\r当前速度:{:.2f}%'.format(count*100/len(lst)),end='') except: count = count + 1 print('\r当前速度:{:.2f}%'.format(count * 100 / len(lst)), end='') traceback.print_exc() continue
return "" def main2(): stock_list_url = 'https://quote.eastmoney.com/stocklist.html' stock_info_url = 'https://xueqiu.com/S/BIDU' output_file = 'D://BaiduGupiao.txt' slist = [] getstockList(slist,stock_list_url) getstockInfo(slist,stock_info_url,output_file)
|