博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
python 获取提交表单网址内容(即需要密码网址)以财务网站为例
阅读量:7024 次
发布时间:2019-06-28

本文共 5585 字,大约阅读时间需要 18 分钟。

import requests import re from functools import reduce import json import base64 import time import os -------- 安装的python包---------------
pwd = base64.b64decode("")    #64位解码 loginMeta = {
"username":'',"pwd":pwd} #用户名和密码 msession = requests.Session() ret = msession.post("http://uis.shou.edu.cn/cas/login?isLoginService=11&service=http://ecampus.shou.edu.cn/c/portal/login", { "username":loginMeta['username'], "password":loginMeta['pwd'], "submit":"" },allow_redirects = False) ret = msession.get(ret.headers['Location'],allow_redirects = False) Jsession = ret.headers['Set-Cookie'].split(';')[0] ret = msession.get(ret.headers['Location'],allow_redirects = False) Jheaders = { 'Cookie':'COOKIE_SUPPORT=true; JSESSIONID=%s; GUEST_LANGUAGE_ID=zh_CN'%Jsession} getASessionUrl = '' ret = msession.get(getASessionUrl,headers=Jheaders,allow_redirects=False) while 'Location' in ret.headers: ret = msession.get(ret.headers['Location']) fwUrl = "" ret = msession.get(fwUrl,headers=Jheaders,allow_redirects=False) while 'Location' in ret.headers: ret = msession.get(ret.headers['Location']) ACookies = requests.utils.dict_from_cookiejar(msession.cookies) ret = msession.get('') ret = msession.get('') ------------登录部分长久保存cookie-------------------------------------
def parseOrderInfo(content):     content = content.replace("\r", '').replace('\n', '').replace('\t', ' ') eles = re.findall('
(.*?)", "\n\\1", ele) #subn替换函数 p = re.subn("
", "", p[0]) p = p[0] p = list(filter(lambda x: x if len(x.strip()) > 1 else None, p.split('\n'))) print(p) if (len(p) > 6): cinfo = {} cinfo["orderId"] = p[1].strip() cinfo["project"] = p[2].strip() cinfo["reason"] = p[3].strip() cinfo["pay"] = p[4].strip() cinfo["date"] = p[6].strip() orders += [cinfo] else: raise Exception("too LONG order Description") return orders ret = msession.post(url='http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderShow', data={ 'DepartProject': '', 'Depart': '', 'depname': '', 'Object': '', 'projectname': '', 'OrderStartTime': '', 'OrderEndTime': '', 'OrderNo': '', 'OrderState': '1,2,3,4,5,8,-1', 'ExpenBusinessType': '', 'currentPageIndex': '1', 'num': '1', 'isture': 'false', 'ProxyPerson': '', 'OrderRemark': ''}, headers={ 'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s' % (ACookies["ASP.NET_SessionId"], ACookies["SFP_Verify_Cookie"]), 'Referer': 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderIndex', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'})
time.sleep(2) seaContent = ret.content.decode() orderInfo = parseOrderInfo(seaContent) orders = re.findall("SFP_ClaimsSelf/OrderQuery/PrintOrder\?OrderNo=(\\d+)", seaContent) # pages=2 pages = int(re.findall("pagecount: '(\\d*)'", seaContent)[0])
if pages > 1: for i in range(1, pages + 1): if i == 1: ret = msession.post(url='http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderShow', #提交post表单 data={ 'DepartProject': '', 'Depart': '', 'depname': '', 'Object': '', 'projectname': '', 'OrderStartTime': '', 'OrderEndTime': '', 'OrderNo': '', 'OrderState': '1,2,3,4,5,8,-1', 'ExpenBusinessType': '', 'currentPageIndex': '1', 'num': '1', 'isture': 'false', 'ProxyPerson': '', 'OrderRemark': ''}, headers={ 'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s' % (ACookies["ASP.NET_SessionId"], ACookies["SFP_Verify_Cookie"]), 'Referer': 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderIndex', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'}) else: ret = msession.post(url='http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderShow', data={ 'DepartProject': '', 'Depart': '', 'depname': '', 'Object': '', 'projectname': '', 'OrderStartTime': '', 'OrderEndTime': '', 'OrderNo': '', 'OrderState': '1,2,3,4,5,8,-1', 'ExpenBusinessType': '', 'currentPageIndex': '%d' % i, 'num': '2', 'isture': 'false', 'ProxyPerson': '', 'OrderRemark': ''}, headers={ 'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s' % (ACookies["ASP.NET_SessionId"], ACookies["SFP_Verify_Cookie"]), 'Referer': 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderIndex', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'})
seaContent = ret.content.decode() orders += re.findall("SFP_ClaimsSelf/OrderQuery/PrintOrder\?OrderNo=(\\d+)", seaContent) orderInfo += parseOrderInfo(seaContent) time.sleep(1) # orderprint for orderId in orders: Url = 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/PrintOrder?OrderNo=' printUrl = Url + orderId Number = int(i) # print(printUrl) result = requests.get(url=printUrl, headers={ 'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s' % (ACookies["ASP.NET_SessionId"], ACookies["SFP_Verify_Cookie"]), 'Referer': ret.url, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }) address="E:/totally/FinancePDF"+"/" #pdf文件存储地址 fileName=orderId+".pdf" if os.path.isfile(address+fileName): print(fileName+'文件已存在') else: with open(address+orderId+".pdf", "wb") as f: f.write(result.content)
sumInfo = {
"detail": orders} print(json.dumps(sumInfo, indent=4)) ------------------提交表单部分(表单内容不能少,否则会停止爬取)-------------------------------------- if __name__ == '__main__': parseOrderInfo()
 
 
 
 

转载于:https://www.cnblogs.com/setname/p/8417737.html

你可能感兴趣的文章
cf 323A A. Black-and-White Cube 立体构造 不知道为什么当k为奇数时构造不出来 挺有趣的题目吧...
查看>>
JAVA向文件中追加内容(转)
查看>>
写出一篇好博文需要用到的工具
查看>>
Squid普通代理&&透明代理&&反向代理学习
查看>>
Geeks Union-Find Algorithm Union By Rank and Path Compression 图环算法
查看>>
苹果ipa软件包破解笔记
查看>>
contains 和 ele.compareDocumentPosition确定html节点间的关系
查看>>
Swift2.0语言教程之类的属性
查看>>
poj_3436 网络最大流
查看>>
SPR EAD NET 6
查看>>
ecshop数据表说明
查看>>
拓扑排序实现
查看>>
CSS实现透明边框
查看>>
Linux中Firefox——Firebug插件安装及使用
查看>>
bitShark对Android版本的支持
查看>>
Elasticsearch【正则搜索】分析&实践
查看>>
[leetcode]Valid Sudoku
查看>>
redis 基本性能测试说明
查看>>
Eclipse中直接执行sql语句(图文说明)
查看>>
去掉文件末尾的换行符, 怎么做最方便?
查看>>