时隔多年,开盘啦APP龙虎榜席位标签爬虫,再上路,代码如下,非专业开发,很业余,数据解析存储中间还有很多不到位的地方,欢迎留言交流:
# -*- coding:utf-8 -*-
import pymysql
import datetime
import pandas as pd
import akshare as ak
import requests
import json
import pymysql
from sqlalchemy import create_engine
def spider_lhb_sales_departmentdate, code):
try:
url = 'https://lhb.kaipanla.com/w1/api/index.php?apiv=w28&PhoneOSNew=1&VerSion=5.2.0.1 HTTP/1.1'
data = {
'c': 'Stock',
'a': 'GetNewOneStockInfo',
'Type': 0,
'Time': date,
'StockID': code,
'DeviceID': 'ffffffff-f916-2186-0000-00000cdf9093'
}
headers = {
'User-Agent': 'Mozilla/5.0 Linux; Android 7.1.2; VOG-AL00 Build/N2G48H; wv) AppleWebKit/537.36 KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.70 Mobile Safari/537.36;kaipanla 5.2.0.1',
}
response = evalrequests.posturl=url, data=data, headers=headers).text).get'List')[0]
# 开始解析
SellList = response.get'SellList')
BuyList = response.get'BuyList')
df = pd.DataFramecolumns=['营业部ID', '营业部名称', '营业部标签', '资金席位ID', '资金席位名称'])
for i in SellList:
ID = i.get'ID')
Name = i.get'Name')
YouZiIcon = i.get'YouZiIcon')
GroupID = i.get'GroupID')
GroupIcon = i.get'GroupIcon')
dict1 = {
'营业部ID': ID,
'营业部名称': Name,
'营业部标签': YouZiIcon,
'资金席位ID': GroupID,
'资金席位名称': GroupIcon
}
df1 = pd.DataFramedict1)
df = df.appenddf1, ignore_index=False)
for j in BuyList:
ID = j.get'ID')
Name = j.get'Name')
YouZiIcon = j.get'YouZiIcon')
GroupID = j.get'GroupID')
GroupIcon = j.get'GroupIcon')
dict2 = {
'营业部ID': ID,
'营业部名称': Name,
'营业部标签': YouZiIcon,
'资金席位ID': GroupID,
'资金席位名称': GroupIcon
}
df2 = pd.DataFramedict2)
df = df.appenddf2, ignore_index=False)
printdf)
df.to_sql'ods_basic_department_info', con=engine1, if_exists='append', index=False)
except Exception as error:
pass
if __name__ == '__main__':
# 定义日期
today = datetime.datetime.strftimedatetime.datetime.now), '%Y-%m-%d')
# engine
engine1 = create_engine'mysql+pymysql://root:123456@localhost/stock_ods_db?charset=utf8')
engine2 = create_engine'mysql+pymysql://root:123456@localhost/stock_dwd_db?charset=utf8')
# 获取龙虎榜名单
lhb_df = pd.read_sql'select distinct t_date,v_code from dwd_stock_special_lhb', con=engine2)
for date in lhb_df['t_date'].values:
for code in lhb_df['v_code'].values:
print'开始')
spider_lhb_sales_departmentdate, code)
print'结束')