python3+BeautifulSoup+tkinter 爬虫 获取学校成绩

写的是一个小爬虫,有界面,爬取学校成绩

从最开始只用python3+正则表达式

然后加界面用tkinter

到最后加了BeautifulSoup

现在看起来和谐多了

<pre name="code" class="python">#获取学校成绩

import re,string,urllib.parse,urllib.request
from tkinter import *
from tkinter import ttk
from bs4 import BeautifulSoup
class AhutScore:
    def __init__(self):
        print('已经启动安工大成绩查询爬虫,咔嚓咔嚓')

def getAhutScore(self,stuNo,idCard,xn,xq):
    #通过HttpWatch抓包的当前网站的形式
    postdata=urllib.parse.urlencode({
        &#39;__EVENTVALIDATION&#39;:&#39;/wEWIQLH/uyCBwLs0bLrBgLs0fbZDALWrMSACwKEx5fABgKFx/uABQKax7/ABwKax6OABgKbx6OABgKYx+dBAsKF4K8GAs2FiJQIAsqF5O0IAsOF8PcLAsCFjO0JAvGV4pUFAv/6yPsJAv76yPsJAvbLmuYBAq7k2jACzqvD4A4CrvycrAcCi+uC+wwCn/nbgQ0C4d349AoC9PbF/AwCrZj0xQsCrZiIoQQC0sqYtwoC6MqwtAcC1srwtQoChobTsw4C1orq2A/lc4cMuGz9/vf0WzeaMjk2B63pi/yD0c3bh6AkZ2usTA==&#39;,
        &#39;__VIEWSTATE&#39;:&#39;/wEPDwUKLTc5MTY3NzY2OA9kFgICAw9kFg4CBQ8QZBAVDA09Peivt+mAieaLqT09CTIwMTQtMjAxNQkyMDEzLTIwMTQJMjAxMi0yMDEzCTIwMTEtMjAxMwkyMDExLTIwMTIJMjAxMC0yMDExCTIwMDktMjAxMAkyMDA4LTIwMDkJMjAwNy0yMDA4CTIwMDYtMjAwNwkyMDA1LTIwMDYVDAAJMjAxNC0yMDE1CTIwMTMtMjAxNAkyMDEyLTIwMTMJMjAxMS0yMDEzCTIwMTEtMjAxMgkyMDEwLTIwMTEJMjAwOS0yMDEwCTIwMDgtMjAwOQkyMDA3LTIwMDgJMjAwNi0yMDA3CTIwMDUtMjAwNhQrAwxnZ2dnZ2dnZ2dnZ2dkZAIHDxBkEBUDDT096K+36YCJ5oupPT0BMgExFQMAATIBMRQrAwNnZ2dkZAIdD2QWAgIFDzwrABEAZAIfD2QWAgIBDzwrABEAZAIjD2QWAgIJDzwrABEAZAIlD2QWAgIDDxBkZBYBZmQCJw9kFgICAQ88KwARAQEQFgAWABYAZBgEBQlHcmlkVmlldzMPZ2QFCUdyaWRWaWV3MQ9nZAUMR3JpZFZpZXdfY2owD2dkBQtHcmlkVmlld19jag9nZJ3osNiaHFKtpB351twVA++gU7GdyOdYypVlNUYHNaNo&#39;,
        &#39;__VIEWSTATEGENERATOR&#39;:&#39;DCA2160B&#39;,
        &#39;Button_cjcx&#39;:&#39;查询&#39;,  
        &#39;drop_type&#39;:&#39;全部成绩&#39;,  
        &#39;drop_xn&#39;:xn,  
        &#39;drop_xq&#39;:xq,  
        &#39;hid_dqszj&#39;:&#39;&#39;,
        &#39;TextBox1&#39;:stuNo,
        &#39;TextBox2&#39;:idCard
    }).encode(encoding=&#39;utf-8&#39;)
    #伪装消息头,伪装用户访问
    headers = {  
        &#39;User-Agent&#39;:&#39;Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6&#39;  
    }
    #创建请求对象
    req = urllib.request.Request(
        url = &#39;http://211.70.149.134:8080/stud_score/brow_stud_score.aspx&#39;,  
        data = postdata,
        headers = headers
    )
    #接收结果对象
    result = urllib.request.urlopen(req)
    #读取并解码
    unicodePage = result.read().decode(&#39;utf-8&#39;)
    soup = BeautifulSoup(unicodePage)
    #print(soup.prettify())
    stuMsg = soup.find(&#39;span&#39;,id=&#39;Label1&#39;).string
    #stuMsg = soup.html(&#39;span&#39;,attrs={&quot;id&quot;: &quot;Label1&quot;})[0].string
    #scoreMsg = re.findall(&#39;&lt;tr align=&quot;left&quot; onmouseover=&quot;c=this.style.backgroundColor;this.style.backgroundColor=&#39;#EEC470&#39;&quot; onmouseout=&quot;this.style.backgroundColor=c&quot; style=&quot;.*?&quot;&gt;(.*?)&lt;/tr&gt;&#39;,unicodePage,re.S)
    # &lt;tr class=&quot;Freezing&quot; style=&quot;color:White;background-color:#006699;border-color:#6666FF;font-weight:bold;height:25px;&quot;&gt;&lt;th&gt;*n&lt;/tr&gt;
    #&lt;span id=&quot;Label_SHOW&quot; style=&quot;font-size:Small;color: #003300; font-family: 微软雅黑;&quot;&gt;共找到4条记录!&lt;/span&gt;
    items = []
    labelShow = soup.find(&#39;span&#39;,id=&#39;Label_SHOW&#39;).string
    if labelShow == &#39;没有返回记录!&#39;:
        return items,stuMsg,labelShow
    scoreTitle = soup.find(&#39;tr&#39;,class_=&#39;Freezing&#39;)(&#39;th&#39;)
    scoreMsg = soup(&#39;tr&#39;,align=&#39;left&#39;,onmouseout=&quot;this.style.backgroundColor=c&quot;)
    
    for itemMsg in scoreMsg:
        #itemScore = re.findall(&#39;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td style=&quot;.*?&quot;&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&lt;td&gt;(.*?)&lt;/td&gt;&#39;,itemMsg,re.S)
        itemScore = itemMsg(&#39;td&#39;)
        dicts ={}
        for i in range(15):
            dicts[scoreTitle[i].string] = itemScore[i].string.replace(&#39;\xa0&#39;,&#39;&#39;)
        items.append(dicts)
    print(items)
    return items,stuMsg,labelShow

def getGUI(self):
    def test():
        result = self.getAhutScore(stuNo.get(),idCard.get(),xn.get(),xq.get())
        rstk = Tk()
        rstk.title(&#39;成绩查询结果&#39;)
        rstk.resizable(width=False, height=False)
        t=Text(rstk)
        t.insert(1.0,result[1]+&#39;\n&#39;)
        t.insert(2.0,&#39;-&#39;*32+result[2]+&#39;-&#39;*32+&#39;\n&#39;)
        if result[0]!=[]:
            for item in result[0]:
                t.insert(3.0,item[&#39;课程名&#39;]+&#39;:&#39;+item[&#39;总评成绩&#39;]+&#39;\n&#39;)
        else:
            t.insert(3.0,&#39;暂无信息!\n&#39;)
        t.grid(row=5, column=0,columnspan=4)
    root = Tk()
    root.geometry()
    root.resizable(width=False, height=False)
    root.title(&#39;ahut成绩查询&#39;)
    Label(root,text=&#39;学号:&#39;).grid(row=0, column=0,sticky=W)
    Label(root,text=&#39;身份证号:&#39;).grid(row=0, column=2,sticky=W)
    Label(root,text=&#39;学年:&#39;).grid(row=1, column=0,sticky=W)
    Label(root,text=&#39;学期:&#39;).grid(row=1, column=2,sticky=W)
    stuNo = StringVar()
    idCard = StringVar()
    xn = StringVar()
    xq = StringVar()
    #Entry(root, textvariable=e, state=&quot;readonly&quot;, show=&quot;*&quot;)
    Entry(root,textvariable = stuNo).grid(row=0, column=1)
    Entry(root,textvariable = idCard).grid(row=0, column=3)
    stuNo.set(&#39;11908***&#39;)
    idCard.set(&#39;34082*************&#39;)
    xnBox = ttk.Combobox(root,textvariable=xn,state=&#39;readonly&#39;)
    xnBox[&#39;values&#39;] = (&#39;&#39;,&#39;2010-2011&#39;, &#39;2011-2012&#39;, &#39;2012-2013&#39;,&#39;2013-2014&#39;, &#39;2014-2015&#39;)
    xnBox.set(&#39;2014-2015&#39;)
    xnBox.grid(row=1, column=1)
    xqBox = ttk.Combobox(root,textvariable=xq,state=&#39;readonly&#39;)
    xqBox[&#39;values&#39;] = (&#39;&#39;,&#39;1&#39;, &#39;2&#39;)
    xqBox.set(&#39;1&#39;)
    xqBox.grid(row=1, column=3)
    b = Button(root,text=&#39;查询&#39;,command=test).grid(row=4, columnspan=4)
    #label = Label(root,text=&#39;&#169; F8&#39;).grid()
    root.mainloop()

ahutScore = AhutScore()
ahutScore.getGUI()




python3+BeautifulSoup+tkinter 爬虫 获取学校成绩
https://www.920929.xyz/posts/55a796ac.html
作者
DELIN
发布于
2015年3月10日
许可协议