using System; using System.Collections.Generic; using System.Linq; using System.Net; using System.Xml; using FreeSql; using JXCMS.CMS.Movie.Entity; using JXCMS.CMS.Movie.Models; using JXCMS.Core.Extensions; using JXCMS.Core.Log; using SufeiUtil; using Log = Serilog.Log; namespace JXCMS.CMS.Movie.Spider { public class MovieSpider { private readonly List _webSiteClassifyEntities; private readonly List _actorEntities; private readonly List _directorEntities; private static MovieSpider _instance; public static MovieSpider Instance => _instance ??= new MovieSpider(); private MovieSpider() { _webSiteClassifyEntities = WebSiteClassifyEntity.Select.ToList(); _actorEntities = ActorEntity.Select.ToList(); _directorEntities = DirectorEntity.Select.ToList(); } public MovieListInfoModel GetMovieList(WebSiteEntity webSiteEntity, int number) { HttpItem item = new HttpItem(); item.URL = $"{webSiteEntity.ApiUrl}?pg={number}"; HttpHelper helper = new HttpHelper(); var result = helper.GetHtml(item); if (result.StatusCode == HttpStatusCode.OK) { Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName); XmlDocument doc = new XmlDocument(); doc.LoadXml(result.Html); var listNode = doc.SelectSingleNode("/rss/list"); MovieListInfoModel movieListInfoModel = new MovieListInfoModel();; movieListInfoModel.CurrentPage = listNode.Attributes["page"].Value; movieListInfoModel.PageCount = listNode.Attributes["pagecount"].Value; movieListInfoModel.PageSize = listNode.Attributes["pagesize"].Value; movieListInfoModel.RecordCount = listNode.Attributes["recordcount"].Value; var videos = listNode.SelectNodes("//video"); foreach (XmlNode video in videos) { MovieInfoModel model = new MovieInfoModel { LastUpdateTime = DateTime.Parse(video.SelectSingleNode("last").InnerText), Name = video.SelectSingleNode("name").InnerText, TypeId = video.SelectSingleNode("tid").InnerText, Id = video.SelectSingleNode("id").InnerText, TypeName = video.SelectSingleNode("type").InnerText }; movieListInfoModel.MovieInfoModels.Add(model); } return movieListInfoModel; } Log.Warning($"{{@name}} 获取api失败,错误码{result.StatusCode}", webSiteEntity.WebSiteName); return null; } public DateTime GetMovieInfos(WebSiteEntity webSiteEntity, List<(string id, string name, string typeId)> idNameTypes) { HttpItem item = new HttpItem(); item.URL = $"{webSiteEntity.ApiUrl}?ac=videolist&ids={string.Join(",", idNameTypes.Select(x => x.id))}"; HttpHelper helper = new HttpHelper(); var result = helper.GetHtml(item); if (result.StatusCode == HttpStatusCode.OK) { DateTime dt = DateTime.MinValue; Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName); XmlDocument doc = new XmlDocument(); doc.LoadXml(result.Html); var videoNode = doc.SelectNodes("/rss/list/video"); foreach (XmlNode video in videoNode) { var name = video.SelectSingleNode("name").InnerText; var director = video.SelectSingleNode("director").InnerText; if (director.IsNullOrEmpty()) { director = "未知"; } var movieEntity = MovieEntity.Select.Include(x => x.DirectorEntity) .Where(x => x.Name == name && x.DirectorEntity.DirectorName == director).First(); if (movieEntity == null) { movieEntity = new MovieEntity(); movieEntity.Area = video.SelectSingleNode("area").InnerText == "" ? "未知":video.SelectSingleNode("area").InnerText; movieEntity.Des = video.SelectSingleNode("des").InnerText == "" ? "暂无介绍":video.SelectSingleNode("des").InnerText; movieEntity.Lang = video.SelectSingleNode("lang").InnerText == "" ? "未知":video.SelectSingleNode("lang").InnerText; movieEntity.Name = video.SelectSingleNode("name").InnerText; movieEntity.Pic = video.SelectSingleNode("pic").InnerText == "" ? "/img/cover.jpg":video.SelectSingleNode("lang").InnerText; movieEntity.Year = video.SelectSingleNode("year").InnerText == "" || video.SelectSingleNode("year").InnerText == "0" ? "未知":video.SelectSingleNode("year").InnerText; movieEntity.ClassifyId = int.Parse(idNameTypes.First(x => x.id == video.SelectSingleNode("id").InnerText).typeId); var directorId = _directorEntities.FirstOrDefault(x => x.DirectorName == director)?.Id; if (directorId == null) { DirectorEntity directorEntity = new DirectorEntity(); directorEntity.DirectorName = director; directorEntity.Save(); directorId = directorEntity.Id; } movieEntity.DirectorId = directorId.Value; movieEntity.LastUpdate = DateTime.Parse(video.SelectSingleNode("last").InnerText); movieEntity.Save(); if (movieEntity.LastUpdate > dt) { dt = movieEntity.LastUpdate; } var actorStr = video.SelectSingleNode("actor").InnerText; if (actorStr.IsNullOrEmpty()) { actorStr = "未知"; } var actorsStr = actorStr.Split(',', ' '); var actors = _actorEntities.Where(x => actorsStr.Contains(x.ActorName)).ToList(); foreach (var actor in actorsStr) { var id = actors.FirstOrDefault(x => x.ActorName == actor)?.Id; if (id == null) { ActorEntity actorEntity = new ActorEntity {ActorName = actor}; actorEntity.Save(); id = actorEntity.Id; } MovieActorEntity movieActorEntity = new MovieActorEntity { ActorId = id.Value, MovieId = movieEntity.Id }; movieActorEntity.Save(); } } var dds = video.SelectNodes("dl/dd"); var movieListEntity = MovieListEntity.Where(x => x.MovieId == movieEntity.Id && x.WebSiteId == webSiteEntity.Id) .OrderByDescending(x => x.Id).First(); if (movieListEntity == null) { foreach (XmlNode dd in dds) { var number = dd.InnerText.Split('#'); if (number.Length <= 0 || !number[0].Split('$')[1].ToLower().EndsWith("m3u8")) { continue; } Log.Logger.Information("{@name} 获取到m3u8列表!", webSiteEntity.WebSiteName); var movieListEntities = number.Select(x => { var sp = x.Split('$'); return new MovieListEntity { Name = sp[0], Type = sp[2], MovieId = movieEntity.Id, PlayUrl = sp[1], WebSiteId = webSiteEntity.Id }; }); BaseEntity.Orm.Insert(movieListEntities).ExecuteAffrows(); Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}成功!", webSiteEntity.WebSiteName); } } else { var node = dds.Cast().FirstOrDefault(x => x.Attributes["flag"].Value == movieListEntity.Type); if (node == null) { Log.Logger.Error("{@name} 未找到指定的剧集列表!", webSiteEntity.WebSiteName); } bool flag = false; foreach (var number in node.InnerText.Split('#')) { var sp = number.Split('$'); if (sp[0] == movieListEntity.Name) { flag = true; continue; } if (!flag) { continue; } new MovieListEntity() { Name = sp[0], MovieId = movieEntity.Id, PlayUrl = sp[1], Type = sp[2], WebSiteId = webSiteEntity.Id }.Save(); Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}-{sp[0]}成功!", webSiteEntity.WebSiteName); } } } return dt; } return DateTime.MinValue; } public void StartSpider(WebSiteEntity webSiteEntity, bool reGetAll = false) { var model = GetMovieList(webSiteEntity, 1); if (model == null) { Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName); return; } int number = 1; if (!int.TryParse(model.PageCount, out int count)) { Log.Logger.Error("{@name} 获取总页数失败,采集结束!", webSiteEntity.WebSiteName); return; } DateTime dt = DateTime.MinValue; do { model = GetMovieList(webSiteEntity, number++); if (model == null) { Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName); return; } List<(string id, string name, string typeId)> idNameTypes = new List<(string, string, string)>(); foreach (var modelMovieInfoModel in model.MovieInfoModels) { if (webSiteEntity.LatestMoveTime > modelMovieInfoModel.LastUpdateTime) { Log.Logger.Information("{@name} 已检查到最后一条", webSiteEntity.WebSiteName); break; } if (!_webSiteClassifyEntities.Any(x => x.WebSiteId == webSiteEntity.Id && x.TypeId.ToString() == modelMovieInfoModel.TypeId)) { Log.Logger.Warning($"{{@name}} 未找到与{modelMovieInfoModel.TypeName}对应的分类,自动跳过", webSiteEntity.WebSiteName); continue; } idNameTypes.Add((modelMovieInfoModel.Id, modelMovieInfoModel.Name, modelMovieInfoModel.TypeId)); } if (idNameTypes.Count == 0) { if (webSiteEntity.LatestMoveTime < dt) { webSiteEntity.LatestMoveTime = dt; webSiteEntity.Save(); } Log.Information("{@name} 采集完成!", webSiteEntity.WebSiteName); return; } dt = GetMovieInfos(webSiteEntity, idNameTypes); } while (number <= count); } } }