请访问豆瓣电影网站,爬取4~10部电影信息(电影名、导 演、演员、海报url链接,预报片视频链接),并结合GUI界面展现电影信息,并可以根据选择的电影名, 下载指定预告片视频到本地并显示预告片。GUI

时间:2021-2-20 作者:admin

请访问豆瓣电影网站,爬取4~10部电影信息(电影名、导
演、演员、海报url链接,预报片视频链接),并结合GUI界面展现电影信息,并可以根据选择的电影名,
下载指定预告片视频到本地并显示预告片。GUI界面自行设计。

import cv2 
from PIL import Image,ImageTk
import tkinter as tk 
import os
from bs4 import BeautifulSoup
import requests
import re
def select1():
    txt.insert(tk.END,'请稍等')
    txt.delete(1.0,tk.END)
    textm='应承'
    operate(a1_list,textm)
    # txt.insert(tk.END,mv_message[0])
    movie_1(movie_path)
def select2():
    txt.delete(1.0,tk.END)
    textm='一秒钟'
    operate(a1_list,textm)
    # txt.insert(tk.END,mv_message[2])
    movie_1(movie_path)
def select3():
    txt.delete(1.0,tk.END)
    textm='疯狂原始人2'
    operate(a1_list,textm)
    # txt.insert(tk.END,mv_message[3])
    movie_1(movie_path)
def select4():
    txt.delete(1.0,tk.END)
    textm='日光之下'
    operate(a1_list,textm)
    # txt.insert(tk.END,mv_message[4])
    movie_1(movie_path)
def select5():   #由于有些电影无预告片链接,所以只能跳着放
    txt.delete(1.0,tk.END)
    textm='隐形人'
    operate(a1_list,textm)
    # txt.insert(tk.END,mv_message[7])
    movie_1(movie_path)
def messge():
    print("看片成功")
def movie_1(path):              #视频GUI显示
    video= cv2.VideoCapture(path)
    while video.isOpened() :
        ret,frame=video.read()
        if ret:
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
            current_image = Image.fromarray(img).resize((900, 500))
            imgtk = ImageTk.PhotoImage(image=current_image)
            Label_movie.imgtk = imgtk
            Label_movie.config(image=imgtk)
            Label_movie.update()
        
        else:break
def restore_poster(src_url):           #下载海报,存储到指定位置
    if not os.path.exists('./douban_poster'):
        os.mkdir('./douban_poster')
    poster_data=requests.get(url=src_url,headers=headers).content
    poster_name=src_url.split('/')[-1]
    poster_path='./douban_poster/'+poster_name
    with open(poster_path,'wb') as fp:
        try:
            fp.write(poster_data)
            print('海报下载成功')
            txt.insert(tk.END,'海报下载成功')
        except :
                print('海报下载失败')
                txt.insert(tk.END,'海报下载成功')
                
def find_movie(movie_id):   #通过预告片链接获取MP4格式预告片并下载
    
    
    global movie_path
    movie_text=requests.get(url=movie_id,headers=headers).text
    # soup=BeautifulSoup(movie_text,'lxml')
    #解析预告片
    # movie_list=soup.select('#movie_player video')
    # movie_source=movie_list.source['src']   #预告片资源
    movie_source=(re.findall(r'(http://.*?\.mp4)', movie_text,re.S))#S为单行匹配用re正则库是因为用BeatifulSoup不好找,视频链接类型会变,无法通用
    movie_source=movie_source[0].split('\"')[-1]
    if not os.path.exists('./douban_movie'):
        os.mkdir('./douban_movie')
    movie_data=requests.get(url=movie_source,headers=headers).content
    movie_name=movie_source.split('/')[-1]
    movie_path='./douban_movie/'+movie_name
    # mv_list.append(movie_path)
    with open(movie_path,'wb')as fp:            #预告片下载
        fp.write(movie_data)
        print('预告片下载成功')
        txt.insert(tk.END,'预告片下载成功')
    return movie_source
def operate(a1_list,textm):           #通过遍历bs4.element.resultset类型找到所需要的视频相关信息
    # global mv_message,mv_list
    # mv_message=''

    fp=open('./douban.txt','w',encoding='utf_8')
    for a1 in a1_list:
        title=a1.div.h3.a.string   #影片名
        if title==textm :
            
            src_url=a1.a.img['src']    #海报链接
            print(title,'爬取成功')
            txt.insert(tk.END,title+'爬取成功')
            restore_poster(src_url)
            detail_url=a1.a['href']    #详情链接
        
            try:
                movie_id=a1.div.ul.a['href']      #获取预告片网页
                movie_source=find_movie(movie_id)           #获取电影预告片链接
            
            except TypeError:
                movie_source='暂无预告片链接'
                print(movie_source)
        #发起请求
            detail_text=requests.get(url=detail_url,headers=headers).text  #解析导演等等信息
            detail_soup=BeautifulSoup(detail_text,'lxml')
            div_tag=detail_soup.find('div',id='info')
        #解析链接内容
            content=div_tag.text
            txt.delete(1.0,tk.END)
            s='影片名:'+title+' '+content+'海报链接:'+src_url+'\n'+'视频链接:'+movie_source+'\n'
            try:
                txt.insert(tk.END,s)
            except AttributeError:
                    print('信息保存出错一次')
            fp.write('影片名:'+title+' '+content+'海报链接:'+src_url+'视频链接:'+movie_source+'\n')
        else :continue
    fp.close()
def main():
        #对首页数据获取
    global headers,mv_list,a1_list
    url='https://movie.douban.com/cinema/later/nanchang/'
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36 Edg/87.0.664.41'
        }
    try:
        page_text=requests.get(url=url,headers=headers).text
        print("成功进入豆瓣网")
    except :
        print('访问失败')
    #在首页解析电影标题和详情链接url
    #1.实例化beautifulsoup对象
    soup=BeautifulSoup(page_text,'lxml')
    #解析电影标题和详情页url
    a1_list=soup.select('#showing-soon.tab-bd > div')
    #GUI
    # operate(a1_list)
    # print(mv_list)
    # print(mv_message)
    global Label_movie ,root,txt
    root =tk.Tk()
    root.title("GUI的测试窗口")
    root.geometry("1300x768+150+0")
     #创建小窗口2
    # Label_movie.place(x=100,y=0)
    #位于左边
    Label_movie=tk.Label(root)
    Label_movie.place(x=200,y=0)
    txt=tk.Text(root,width=200,height=200,font='华文新魏')
    txt.place(relx=0,rely=0.65)
    s="请选择电影"
    txt.insert(tk.END,s)
    rd1=tk.Button(root,text="应承",font=("华文新魏"),command=select1)
    rd1.place(x=10,y=25)
    print(rd1['text'])
    rd2=tk.Button(root,text='一秒钟',font=("华文新魏"),command=select2)
    rd2.place(x=10,y=125)
    rd3=tk.Button(root,text='疯狂原始人2',font=("华文新魏"),command=select3)
    rd3.place(x=10,y=225)
    rd4=tk.Button(root,text='日光之下',font=("华文新魏"),command=select4)
    rd4.place(x=10,y=325)
    rd5=tk.Button(root,text='隐形人',font=("华文新魏"),command=select5)
    rd5.place(x=10,y=425)
    root.mainloop()
if __name__=='__main__':
    main()

最后附上运行视频

声明:本文内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎进行举报,并提供相关证据,工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。