JXMovies/CMS/JXCMS.CMS.Movie/Spider/MovieSpider.cs
2020-02-16 22:11:24 +08:00

265 lines
13 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Xml;
using FreeSql;
using JXCMS.CMS.Movie.Entity;
using JXCMS.CMS.Movie.Models;
using JXCMS.Core.Extensions;
using JXCMS.Core.Log;
using SufeiUtil;
using Log = Serilog.Log;
namespace JXCMS.CMS.Movie.Spider
{
public class MovieSpider
{
private readonly List<WebSiteClassifyEntity> _webSiteClassifyEntities;
private readonly List<ActorEntity> _actorEntities;
private readonly List<DirectorEntity> _directorEntities;
private static MovieSpider _instance;
public static MovieSpider Instance => _instance ??= new MovieSpider();
private MovieSpider()
{
_webSiteClassifyEntities = WebSiteClassifyEntity.Select.ToList();
_actorEntities = ActorEntity.Select.ToList();
_directorEntities = DirectorEntity.Select.ToList();
}
public MovieListInfoModel GetMovieList(WebSiteEntity webSiteEntity, int number)
{
HttpItem item = new HttpItem();
item.URL = $"{webSiteEntity.ApiUrl}?pg={number}";
HttpHelper helper = new HttpHelper();
var result = helper.GetHtml(item);
if (result.StatusCode == HttpStatusCode.OK)
{
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
XmlDocument doc = new XmlDocument();
doc.LoadXml(result.Html);
var listNode = doc.SelectSingleNode("/rss/list");
MovieListInfoModel movieListInfoModel = new MovieListInfoModel();;
movieListInfoModel.CurrentPage = listNode.Attributes["page"].Value;
movieListInfoModel.PageCount = listNode.Attributes["pagecount"].Value;
movieListInfoModel.PageSize = listNode.Attributes["pagesize"].Value;
movieListInfoModel.RecordCount = listNode.Attributes["recordcount"].Value;
var videos = listNode.SelectNodes("//video");
foreach (XmlNode video in videos)
{
MovieInfoModel model = new MovieInfoModel
{
LastUpdateTime = DateTime.Parse(video.SelectSingleNode("last").InnerText),
Name = video.SelectSingleNode("name").InnerText,
TypeId = video.SelectSingleNode("tid").InnerText,
Id = video.SelectSingleNode("id").InnerText,
TypeName = video.SelectSingleNode("type").InnerText
};
movieListInfoModel.MovieInfoModels.Add(model);
}
return movieListInfoModel;
}
Log.Warning($"{{@name}} 获取api失败错误码{result.StatusCode}", webSiteEntity.WebSiteName);
return null;
}
public DateTime GetMovieInfos(WebSiteEntity webSiteEntity, List<(string id, string name, string typeId)> idNameTypes)
{
HttpItem item = new HttpItem();
item.URL = $"{webSiteEntity.ApiUrl}?ac=videolist&ids={string.Join(",", idNameTypes.Select(x => x.id))}";
HttpHelper helper = new HttpHelper();
var result = helper.GetHtml(item);
if (result.StatusCode == HttpStatusCode.OK)
{
DateTime dt = DateTime.MinValue;
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
XmlDocument doc = new XmlDocument();
doc.LoadXml(result.Html);
var videoNode = doc.SelectNodes("/rss/list/video");
foreach (XmlNode video in videoNode)
{
var name = video.SelectSingleNode("name").InnerText;
var director = video.SelectSingleNode("director").InnerText;
if (director.IsNullOrEmpty())
{
director = "未知";
}
var movieEntity = MovieEntity.Select.Include(x => x.DirectorEntity)
.Where(x => x.Name == name && x.DirectorEntity.DirectorName == director).First();
if (movieEntity == null)
{
movieEntity = new MovieEntity();
movieEntity.Area = video.SelectSingleNode("area").InnerText == "" ? "未知":video.SelectSingleNode("area").InnerText;
movieEntity.Des = video.SelectSingleNode("des").InnerText == "" ? "暂无介绍":video.SelectSingleNode("des").InnerText;
movieEntity.Lang = video.SelectSingleNode("lang").InnerText == "" ? "未知":video.SelectSingleNode("lang").InnerText;
movieEntity.Name = video.SelectSingleNode("name").InnerText;
movieEntity.Pic = video.SelectSingleNode("pic").InnerText == "" ? "/img/cover.jpg":video.SelectSingleNode("lang").InnerText;
movieEntity.Year = video.SelectSingleNode("year").InnerText == "" || video.SelectSingleNode("year").InnerText == "0" ? "未知":video.SelectSingleNode("year").InnerText;
movieEntity.ClassifyId = int.Parse(idNameTypes.First(x => x.id == video.SelectSingleNode("id").InnerText).typeId);
var directorId = _directorEntities.FirstOrDefault(x => x.DirectorName == director)?.Id;
if (directorId == null)
{
DirectorEntity directorEntity = new DirectorEntity();
directorEntity.DirectorName = director;
directorEntity.Save();
directorId = directorEntity.Id;
}
movieEntity.DirectorId = directorId.Value;
movieEntity.LastUpdate = DateTime.Parse(video.SelectSingleNode("last").InnerText);
movieEntity.Save();
if (movieEntity.LastUpdate > dt)
{
dt = movieEntity.LastUpdate;
}
var actorStr = video.SelectSingleNode("actor").InnerText;
if (actorStr.IsNullOrEmpty())
{
actorStr = "未知";
}
var actorsStr = actorStr.Split(',', ' ');
var actors = _actorEntities.Where(x => actorsStr.Contains(x.ActorName)).ToList();
foreach (var actor in actorsStr)
{
var id = actors.FirstOrDefault(x => x.ActorName == actor)?.Id;
if (id == null)
{
ActorEntity actorEntity = new ActorEntity {ActorName = actor};
actorEntity.Save();
id = actorEntity.Id;
}
MovieActorEntity movieActorEntity = new MovieActorEntity
{
ActorId = id.Value, MovieId = movieEntity.Id
};
movieActorEntity.Save();
}
}
var dds = video.SelectNodes("dl/dd");
var movieListEntity = MovieListEntity.Where(x => x.MovieId == movieEntity.Id && x.WebSiteId == webSiteEntity.Id)
.OrderByDescending(x => x.Id).First();
if (movieListEntity == null)
{
foreach (XmlNode dd in dds)
{
var number = dd.InnerText.Split('#');
if (number.Length <= 0 || !number[0].Split('$')[1].ToLower().EndsWith("m3u8"))
{
continue;
}
Log.Logger.Information("{@name} 获取到m3u8列表", webSiteEntity.WebSiteName);
var movieListEntities = number.Select(x =>
{
var sp = x.Split('$');
return new MovieListEntity
{
Name = sp[0],
Type = sp[2],
MovieId = movieEntity.Id,
PlayUrl = sp[1],
WebSiteId = webSiteEntity.Id
};
});
BaseEntity.Orm.Insert(movieListEntities).ExecuteAffrows();
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}成功!", webSiteEntity.WebSiteName);
}
}
else
{
var node = dds.Cast<XmlNode>().FirstOrDefault(x => x.Attributes["flag"].Value == movieListEntity.Type);
if (node == null)
{
Log.Logger.Error("{@name} 未找到指定的剧集列表!", webSiteEntity.WebSiteName);
}
bool flag = false;
foreach (var number in node.InnerText.Split('#'))
{
var sp = number.Split('$');
if (sp[0] == movieListEntity.Name)
{
flag = true;
continue;
}
if (!flag)
{
continue;
}
new MovieListEntity()
{
Name = sp[0],
MovieId = movieEntity.Id,
PlayUrl = sp[1],
Type = sp[2],
WebSiteId = webSiteEntity.Id
}.Save();
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}-{sp[0]}成功!", webSiteEntity.WebSiteName);
}
}
}
return dt;
}
return DateTime.MinValue;
}
public void StartSpider(WebSiteEntity webSiteEntity, bool reGetAll = false)
{
var model = GetMovieList(webSiteEntity, 1);
if (model == null)
{
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
return;
}
int number = 1;
if (!int.TryParse(model.PageCount, out int count))
{
Log.Logger.Error("{@name} 获取总页数失败,采集结束!", webSiteEntity.WebSiteName);
return;
}
DateTime dt = DateTime.MinValue;
do
{
model = GetMovieList(webSiteEntity, number++);
if (model == null)
{
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
return;
}
List<(string id, string name, string typeId)> idNameTypes = new List<(string, string, string)>();
foreach (var modelMovieInfoModel in model.MovieInfoModels)
{
if (webSiteEntity.LatestMoveTime > modelMovieInfoModel.LastUpdateTime)
{
Log.Logger.Information("{@name} 已检查到最后一条", webSiteEntity.WebSiteName);
break;
}
if (!_webSiteClassifyEntities.Any(x => x.WebSiteId == webSiteEntity.Id && x.TypeId.ToString() == modelMovieInfoModel.TypeId))
{
Log.Logger.Warning($"{{@name}} 未找到与{modelMovieInfoModel.TypeName}对应的分类,自动跳过", webSiteEntity.WebSiteName);
continue;
}
idNameTypes.Add((modelMovieInfoModel.Id, modelMovieInfoModel.Name, modelMovieInfoModel.TypeId));
}
if (idNameTypes.Count == 0)
{
if (webSiteEntity.LatestMoveTime < dt)
{
webSiteEntity.LatestMoveTime = dt;
webSiteEntity.Save();
}
Log.Information("{@name} 采集完成!", webSiteEntity.WebSiteName);
return;
}
dt = GetMovieInfos(webSiteEntity, idNameTypes);
} while (number <= count);
}
}
}