265 lines
13 KiB
C#
265 lines
13 KiB
C#
|
using System;
|
|||
|
using System.Collections.Generic;
|
|||
|
using System.Linq;
|
|||
|
using System.Net;
|
|||
|
using System.Xml;
|
|||
|
using FreeSql;
|
|||
|
using JXCMS.CMS.Movie.Entity;
|
|||
|
using JXCMS.CMS.Movie.Models;
|
|||
|
using JXCMS.Core.Extensions;
|
|||
|
using JXCMS.Core.Log;
|
|||
|
using SufeiUtil;
|
|||
|
using Log = Serilog.Log;
|
|||
|
|
|||
|
namespace JXCMS.CMS.Movie.Spider
|
|||
|
{
|
|||
|
public class MovieSpider
|
|||
|
{
|
|||
|
|
|||
|
private readonly List<WebSiteClassifyEntity> _webSiteClassifyEntities;
|
|||
|
private readonly List<ActorEntity> _actorEntities;
|
|||
|
private readonly List<DirectorEntity> _directorEntities;
|
|||
|
|
|||
|
private static MovieSpider _instance;
|
|||
|
|
|||
|
public static MovieSpider Instance => _instance ??= new MovieSpider();
|
|||
|
|
|||
|
private MovieSpider()
|
|||
|
{
|
|||
|
_webSiteClassifyEntities = WebSiteClassifyEntity.Select.ToList();
|
|||
|
_actorEntities = ActorEntity.Select.ToList();
|
|||
|
_directorEntities = DirectorEntity.Select.ToList();
|
|||
|
}
|
|||
|
|
|||
|
public MovieListInfoModel GetMovieList(WebSiteEntity webSiteEntity, int number)
|
|||
|
{
|
|||
|
HttpItem item = new HttpItem();
|
|||
|
item.URL = $"{webSiteEntity.ApiUrl}?pg={number}";
|
|||
|
HttpHelper helper = new HttpHelper();
|
|||
|
var result = helper.GetHtml(item);
|
|||
|
if (result.StatusCode == HttpStatusCode.OK)
|
|||
|
{
|
|||
|
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
|
|||
|
XmlDocument doc = new XmlDocument();
|
|||
|
doc.LoadXml(result.Html);
|
|||
|
var listNode = doc.SelectSingleNode("/rss/list");
|
|||
|
MovieListInfoModel movieListInfoModel = new MovieListInfoModel();;
|
|||
|
movieListInfoModel.CurrentPage = listNode.Attributes["page"].Value;
|
|||
|
movieListInfoModel.PageCount = listNode.Attributes["pagecount"].Value;
|
|||
|
movieListInfoModel.PageSize = listNode.Attributes["pagesize"].Value;
|
|||
|
movieListInfoModel.RecordCount = listNode.Attributes["recordcount"].Value;
|
|||
|
var videos = listNode.SelectNodes("//video");
|
|||
|
foreach (XmlNode video in videos)
|
|||
|
{
|
|||
|
MovieInfoModel model = new MovieInfoModel
|
|||
|
{
|
|||
|
LastUpdateTime = DateTime.Parse(video.SelectSingleNode("last").InnerText),
|
|||
|
Name = video.SelectSingleNode("name").InnerText,
|
|||
|
TypeId = video.SelectSingleNode("tid").InnerText,
|
|||
|
Id = video.SelectSingleNode("id").InnerText,
|
|||
|
TypeName = video.SelectSingleNode("type").InnerText
|
|||
|
};
|
|||
|
movieListInfoModel.MovieInfoModels.Add(model);
|
|||
|
}
|
|||
|
return movieListInfoModel;
|
|||
|
}
|
|||
|
Log.Warning($"{{@name}} 获取api失败,错误码{result.StatusCode}", webSiteEntity.WebSiteName);
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
public DateTime GetMovieInfos(WebSiteEntity webSiteEntity, List<(string id, string name, string typeId)> idNameTypes)
|
|||
|
{
|
|||
|
HttpItem item = new HttpItem();
|
|||
|
item.URL = $"{webSiteEntity.ApiUrl}?ac=videolist&ids={string.Join(",", idNameTypes.Select(x => x.id))}";
|
|||
|
HttpHelper helper = new HttpHelper();
|
|||
|
var result = helper.GetHtml(item);
|
|||
|
if (result.StatusCode == HttpStatusCode.OK)
|
|||
|
{
|
|||
|
DateTime dt = DateTime.MinValue;
|
|||
|
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
|
|||
|
XmlDocument doc = new XmlDocument();
|
|||
|
doc.LoadXml(result.Html);
|
|||
|
var videoNode = doc.SelectNodes("/rss/list/video");
|
|||
|
foreach (XmlNode video in videoNode)
|
|||
|
{
|
|||
|
var name = video.SelectSingleNode("name").InnerText;
|
|||
|
var director = video.SelectSingleNode("director").InnerText;
|
|||
|
if (director.IsNullOrEmpty())
|
|||
|
{
|
|||
|
director = "未知";
|
|||
|
}
|
|||
|
var movieEntity = MovieEntity.Select.Include(x => x.DirectorEntity)
|
|||
|
.Where(x => x.Name == name && x.DirectorEntity.DirectorName == director).First();
|
|||
|
if (movieEntity == null)
|
|||
|
{
|
|||
|
movieEntity = new MovieEntity();
|
|||
|
movieEntity.Area = video.SelectSingleNode("area").InnerText == "" ? "未知":video.SelectSingleNode("area").InnerText;
|
|||
|
movieEntity.Des = video.SelectSingleNode("des").InnerText == "" ? "暂无介绍":video.SelectSingleNode("des").InnerText;
|
|||
|
movieEntity.Lang = video.SelectSingleNode("lang").InnerText == "" ? "未知":video.SelectSingleNode("lang").InnerText;
|
|||
|
movieEntity.Name = video.SelectSingleNode("name").InnerText;
|
|||
|
movieEntity.Pic = video.SelectSingleNode("pic").InnerText == "" ? "/img/cover.jpg":video.SelectSingleNode("lang").InnerText;
|
|||
|
movieEntity.Year = video.SelectSingleNode("year").InnerText == "" || video.SelectSingleNode("year").InnerText == "0" ? "未知":video.SelectSingleNode("year").InnerText;
|
|||
|
movieEntity.ClassifyId = int.Parse(idNameTypes.First(x => x.id == video.SelectSingleNode("id").InnerText).typeId);
|
|||
|
var directorId = _directorEntities.FirstOrDefault(x => x.DirectorName == director)?.Id;
|
|||
|
if (directorId == null)
|
|||
|
{
|
|||
|
DirectorEntity directorEntity = new DirectorEntity();
|
|||
|
directorEntity.DirectorName = director;
|
|||
|
directorEntity.Save();
|
|||
|
directorId = directorEntity.Id;
|
|||
|
}
|
|||
|
|
|||
|
movieEntity.DirectorId = directorId.Value;
|
|||
|
movieEntity.LastUpdate = DateTime.Parse(video.SelectSingleNode("last").InnerText);
|
|||
|
movieEntity.Save();
|
|||
|
if (movieEntity.LastUpdate > dt)
|
|||
|
{
|
|||
|
dt = movieEntity.LastUpdate;
|
|||
|
}
|
|||
|
var actorStr = video.SelectSingleNode("actor").InnerText;
|
|||
|
if (actorStr.IsNullOrEmpty())
|
|||
|
{
|
|||
|
actorStr = "未知";
|
|||
|
}
|
|||
|
var actorsStr = actorStr.Split(',', ' ');
|
|||
|
var actors = _actorEntities.Where(x => actorsStr.Contains(x.ActorName)).ToList();
|
|||
|
foreach (var actor in actorsStr)
|
|||
|
{
|
|||
|
var id = actors.FirstOrDefault(x => x.ActorName == actor)?.Id;
|
|||
|
if (id == null)
|
|||
|
{
|
|||
|
ActorEntity actorEntity = new ActorEntity {ActorName = actor};
|
|||
|
actorEntity.Save();
|
|||
|
id = actorEntity.Id;
|
|||
|
}
|
|||
|
|
|||
|
MovieActorEntity movieActorEntity = new MovieActorEntity
|
|||
|
{
|
|||
|
ActorId = id.Value, MovieId = movieEntity.Id
|
|||
|
};
|
|||
|
movieActorEntity.Save();
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
var dds = video.SelectNodes("dl/dd");
|
|||
|
var movieListEntity = MovieListEntity.Where(x => x.MovieId == movieEntity.Id && x.WebSiteId == webSiteEntity.Id)
|
|||
|
.OrderByDescending(x => x.Id).First();
|
|||
|
if (movieListEntity == null)
|
|||
|
{
|
|||
|
foreach (XmlNode dd in dds)
|
|||
|
{
|
|||
|
var number = dd.InnerText.Split('#');
|
|||
|
if (number.Length <= 0 || !number[0].Split('$')[1].ToLower().EndsWith("m3u8"))
|
|||
|
{
|
|||
|
continue;
|
|||
|
}
|
|||
|
Log.Logger.Information("{@name} 获取到m3u8列表!", webSiteEntity.WebSiteName);
|
|||
|
var movieListEntities = number.Select(x =>
|
|||
|
{
|
|||
|
var sp = x.Split('$');
|
|||
|
return new MovieListEntity
|
|||
|
{
|
|||
|
Name = sp[0],
|
|||
|
Type = sp[2],
|
|||
|
MovieId = movieEntity.Id,
|
|||
|
PlayUrl = sp[1],
|
|||
|
WebSiteId = webSiteEntity.Id
|
|||
|
};
|
|||
|
});
|
|||
|
BaseEntity.Orm.Insert(movieListEntities).ExecuteAffrows();
|
|||
|
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}成功!", webSiteEntity.WebSiteName);
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
var node = dds.Cast<XmlNode>().FirstOrDefault(x => x.Attributes["flag"].Value == movieListEntity.Type);
|
|||
|
if (node == null)
|
|||
|
{
|
|||
|
Log.Logger.Error("{@name} 未找到指定的剧集列表!", webSiteEntity.WebSiteName);
|
|||
|
}
|
|||
|
bool flag = false;
|
|||
|
foreach (var number in node.InnerText.Split('#'))
|
|||
|
{
|
|||
|
var sp = number.Split('$');
|
|||
|
if (sp[0] == movieListEntity.Name)
|
|||
|
{
|
|||
|
flag = true;
|
|||
|
continue;
|
|||
|
}
|
|||
|
|
|||
|
if (!flag)
|
|||
|
{
|
|||
|
continue;
|
|||
|
}
|
|||
|
|
|||
|
new MovieListEntity()
|
|||
|
{
|
|||
|
Name = sp[0],
|
|||
|
MovieId = movieEntity.Id,
|
|||
|
PlayUrl = sp[1],
|
|||
|
Type = sp[2],
|
|||
|
WebSiteId = webSiteEntity.Id
|
|||
|
}.Save();
|
|||
|
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}-{sp[0]}成功!", webSiteEntity.WebSiteName);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
return dt;
|
|||
|
}
|
|||
|
return DateTime.MinValue;
|
|||
|
}
|
|||
|
|
|||
|
public void StartSpider(WebSiteEntity webSiteEntity, bool reGetAll = false)
|
|||
|
{
|
|||
|
var model = GetMovieList(webSiteEntity, 1);
|
|||
|
if (model == null)
|
|||
|
{
|
|||
|
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
|
|||
|
return;
|
|||
|
}
|
|||
|
int number = 1;
|
|||
|
if (!int.TryParse(model.PageCount, out int count))
|
|||
|
{
|
|||
|
Log.Logger.Error("{@name} 获取总页数失败,采集结束!", webSiteEntity.WebSiteName);
|
|||
|
return;
|
|||
|
}
|
|||
|
DateTime dt = DateTime.MinValue;
|
|||
|
do
|
|||
|
{
|
|||
|
model = GetMovieList(webSiteEntity, number++);
|
|||
|
if (model == null)
|
|||
|
{
|
|||
|
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
|
|||
|
return;
|
|||
|
}
|
|||
|
List<(string id, string name, string typeId)> idNameTypes = new List<(string, string, string)>();
|
|||
|
foreach (var modelMovieInfoModel in model.MovieInfoModels)
|
|||
|
{
|
|||
|
if (webSiteEntity.LatestMoveTime > modelMovieInfoModel.LastUpdateTime)
|
|||
|
{
|
|||
|
Log.Logger.Information("{@name} 已检查到最后一条", webSiteEntity.WebSiteName);
|
|||
|
break;
|
|||
|
}
|
|||
|
if (!_webSiteClassifyEntities.Any(x => x.WebSiteId == webSiteEntity.Id && x.TypeId.ToString() == modelMovieInfoModel.TypeId))
|
|||
|
{
|
|||
|
Log.Logger.Warning($"{{@name}} 未找到与{modelMovieInfoModel.TypeName}对应的分类,自动跳过", webSiteEntity.WebSiteName);
|
|||
|
continue;
|
|||
|
}
|
|||
|
idNameTypes.Add((modelMovieInfoModel.Id, modelMovieInfoModel.Name, modelMovieInfoModel.TypeId));
|
|||
|
}
|
|||
|
|
|||
|
if (idNameTypes.Count == 0)
|
|||
|
{
|
|||
|
if (webSiteEntity.LatestMoveTime < dt)
|
|||
|
{
|
|||
|
webSiteEntity.LatestMoveTime = dt;
|
|||
|
webSiteEntity.Save();
|
|||
|
}
|
|||
|
Log.Information("{@name} 采集完成!", webSiteEntity.WebSiteName);
|
|||
|
return;
|
|||
|
}
|
|||
|
dt = GetMovieInfos(webSiteEntity, idNameTypes);
|
|||
|
} while (number <= count);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|