265 lines
13 KiB
C#
265 lines
13 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.Linq;
|
||
using System.Net;
|
||
using System.Xml;
|
||
using FreeSql;
|
||
using JXCMS.CMS.Movie.Entity;
|
||
using JXCMS.CMS.Movie.Models;
|
||
using JXCMS.Core.Extensions;
|
||
using JXCMS.Core.Log;
|
||
using SufeiUtil;
|
||
using Log = Serilog.Log;
|
||
|
||
namespace JXCMS.CMS.Movie.Spider
|
||
{
|
||
public class MovieSpider
|
||
{
|
||
|
||
private readonly List<WebSiteClassifyEntity> _webSiteClassifyEntities;
|
||
private readonly List<ActorEntity> _actorEntities;
|
||
private readonly List<DirectorEntity> _directorEntities;
|
||
|
||
private static MovieSpider _instance;
|
||
|
||
public static MovieSpider Instance => _instance ??= new MovieSpider();
|
||
|
||
private MovieSpider()
|
||
{
|
||
_webSiteClassifyEntities = WebSiteClassifyEntity.Select.ToList();
|
||
_actorEntities = ActorEntity.Select.ToList();
|
||
_directorEntities = DirectorEntity.Select.ToList();
|
||
}
|
||
|
||
public MovieListInfoModel GetMovieList(WebSiteEntity webSiteEntity, int number)
|
||
{
|
||
HttpItem item = new HttpItem();
|
||
item.URL = $"{webSiteEntity.ApiUrl}?pg={number}";
|
||
HttpHelper helper = new HttpHelper();
|
||
var result = helper.GetHtml(item);
|
||
if (result.StatusCode == HttpStatusCode.OK)
|
||
{
|
||
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
|
||
XmlDocument doc = new XmlDocument();
|
||
doc.LoadXml(result.Html);
|
||
var listNode = doc.SelectSingleNode("/rss/list");
|
||
MovieListInfoModel movieListInfoModel = new MovieListInfoModel();;
|
||
movieListInfoModel.CurrentPage = listNode.Attributes["page"].Value;
|
||
movieListInfoModel.PageCount = listNode.Attributes["pagecount"].Value;
|
||
movieListInfoModel.PageSize = listNode.Attributes["pagesize"].Value;
|
||
movieListInfoModel.RecordCount = listNode.Attributes["recordcount"].Value;
|
||
var videos = listNode.SelectNodes("//video");
|
||
foreach (XmlNode video in videos)
|
||
{
|
||
MovieInfoModel model = new MovieInfoModel
|
||
{
|
||
LastUpdateTime = DateTime.Parse(video.SelectSingleNode("last").InnerText),
|
||
Name = video.SelectSingleNode("name").InnerText,
|
||
TypeId = video.SelectSingleNode("tid").InnerText,
|
||
Id = video.SelectSingleNode("id").InnerText,
|
||
TypeName = video.SelectSingleNode("type").InnerText
|
||
};
|
||
movieListInfoModel.MovieInfoModels.Add(model);
|
||
}
|
||
return movieListInfoModel;
|
||
}
|
||
Log.Warning($"{{@name}} 获取api失败,错误码{result.StatusCode}", webSiteEntity.WebSiteName);
|
||
return null;
|
||
}
|
||
|
||
public DateTime GetMovieInfos(WebSiteEntity webSiteEntity, List<(string id, string name, string typeId)> idNameTypes)
|
||
{
|
||
HttpItem item = new HttpItem();
|
||
item.URL = $"{webSiteEntity.ApiUrl}?ac=videolist&ids={string.Join(",", idNameTypes.Select(x => x.id))}";
|
||
HttpHelper helper = new HttpHelper();
|
||
var result = helper.GetHtml(item);
|
||
if (result.StatusCode == HttpStatusCode.OK)
|
||
{
|
||
DateTime dt = DateTime.MinValue;
|
||
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
|
||
XmlDocument doc = new XmlDocument();
|
||
doc.LoadXml(result.Html);
|
||
var videoNode = doc.SelectNodes("/rss/list/video");
|
||
foreach (XmlNode video in videoNode)
|
||
{
|
||
var name = video.SelectSingleNode("name").InnerText;
|
||
var director = video.SelectSingleNode("director").InnerText;
|
||
if (director.IsNullOrEmpty())
|
||
{
|
||
director = "未知";
|
||
}
|
||
var movieEntity = MovieEntity.Select.Include(x => x.DirectorEntity)
|
||
.Where(x => x.Name == name && x.DirectorEntity.DirectorName == director).First();
|
||
if (movieEntity == null)
|
||
{
|
||
movieEntity = new MovieEntity();
|
||
movieEntity.Area = video.SelectSingleNode("area").InnerText == "" ? "未知":video.SelectSingleNode("area").InnerText;
|
||
movieEntity.Des = video.SelectSingleNode("des").InnerText == "" ? "暂无介绍":video.SelectSingleNode("des").InnerText;
|
||
movieEntity.Lang = video.SelectSingleNode("lang").InnerText == "" ? "未知":video.SelectSingleNode("lang").InnerText;
|
||
movieEntity.Name = video.SelectSingleNode("name").InnerText;
|
||
movieEntity.Pic = video.SelectSingleNode("pic").InnerText == "" ? "/img/cover.jpg":video.SelectSingleNode("lang").InnerText;
|
||
movieEntity.Year = video.SelectSingleNode("year").InnerText == "" || video.SelectSingleNode("year").InnerText == "0" ? "未知":video.SelectSingleNode("year").InnerText;
|
||
movieEntity.ClassifyId = int.Parse(idNameTypes.First(x => x.id == video.SelectSingleNode("id").InnerText).typeId);
|
||
var directorId = _directorEntities.FirstOrDefault(x => x.DirectorName == director)?.Id;
|
||
if (directorId == null)
|
||
{
|
||
DirectorEntity directorEntity = new DirectorEntity();
|
||
directorEntity.DirectorName = director;
|
||
directorEntity.Save();
|
||
directorId = directorEntity.Id;
|
||
}
|
||
|
||
movieEntity.DirectorId = directorId.Value;
|
||
movieEntity.LastUpdate = DateTime.Parse(video.SelectSingleNode("last").InnerText);
|
||
movieEntity.Save();
|
||
if (movieEntity.LastUpdate > dt)
|
||
{
|
||
dt = movieEntity.LastUpdate;
|
||
}
|
||
var actorStr = video.SelectSingleNode("actor").InnerText;
|
||
if (actorStr.IsNullOrEmpty())
|
||
{
|
||
actorStr = "未知";
|
||
}
|
||
var actorsStr = actorStr.Split(',', ' ');
|
||
var actors = _actorEntities.Where(x => actorsStr.Contains(x.ActorName)).ToList();
|
||
foreach (var actor in actorsStr)
|
||
{
|
||
var id = actors.FirstOrDefault(x => x.ActorName == actor)?.Id;
|
||
if (id == null)
|
||
{
|
||
ActorEntity actorEntity = new ActorEntity {ActorName = actor};
|
||
actorEntity.Save();
|
||
id = actorEntity.Id;
|
||
}
|
||
|
||
MovieActorEntity movieActorEntity = new MovieActorEntity
|
||
{
|
||
ActorId = id.Value, MovieId = movieEntity.Id
|
||
};
|
||
movieActorEntity.Save();
|
||
}
|
||
}
|
||
|
||
var dds = video.SelectNodes("dl/dd");
|
||
var movieListEntity = MovieListEntity.Where(x => x.MovieId == movieEntity.Id && x.WebSiteId == webSiteEntity.Id)
|
||
.OrderByDescending(x => x.Id).First();
|
||
if (movieListEntity == null)
|
||
{
|
||
foreach (XmlNode dd in dds)
|
||
{
|
||
var number = dd.InnerText.Split('#');
|
||
if (number.Length <= 0 || !number[0].Split('$')[1].ToLower().EndsWith("m3u8"))
|
||
{
|
||
continue;
|
||
}
|
||
Log.Logger.Information("{@name} 获取到m3u8列表!", webSiteEntity.WebSiteName);
|
||
var movieListEntities = number.Select(x =>
|
||
{
|
||
var sp = x.Split('$');
|
||
return new MovieListEntity
|
||
{
|
||
Name = sp[0],
|
||
Type = sp[2],
|
||
MovieId = movieEntity.Id,
|
||
PlayUrl = sp[1],
|
||
WebSiteId = webSiteEntity.Id
|
||
};
|
||
});
|
||
BaseEntity.Orm.Insert(movieListEntities).ExecuteAffrows();
|
||
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}成功!", webSiteEntity.WebSiteName);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
var node = dds.Cast<XmlNode>().FirstOrDefault(x => x.Attributes["flag"].Value == movieListEntity.Type);
|
||
if (node == null)
|
||
{
|
||
Log.Logger.Error("{@name} 未找到指定的剧集列表!", webSiteEntity.WebSiteName);
|
||
}
|
||
bool flag = false;
|
||
foreach (var number in node.InnerText.Split('#'))
|
||
{
|
||
var sp = number.Split('$');
|
||
if (sp[0] == movieListEntity.Name)
|
||
{
|
||
flag = true;
|
||
continue;
|
||
}
|
||
|
||
if (!flag)
|
||
{
|
||
continue;
|
||
}
|
||
|
||
new MovieListEntity()
|
||
{
|
||
Name = sp[0],
|
||
MovieId = movieEntity.Id,
|
||
PlayUrl = sp[1],
|
||
Type = sp[2],
|
||
WebSiteId = webSiteEntity.Id
|
||
}.Save();
|
||
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}-{sp[0]}成功!", webSiteEntity.WebSiteName);
|
||
}
|
||
}
|
||
}
|
||
return dt;
|
||
}
|
||
return DateTime.MinValue;
|
||
}
|
||
|
||
public void StartSpider(WebSiteEntity webSiteEntity, bool reGetAll = false)
|
||
{
|
||
var model = GetMovieList(webSiteEntity, 1);
|
||
if (model == null)
|
||
{
|
||
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
|
||
return;
|
||
}
|
||
int number = 1;
|
||
if (!int.TryParse(model.PageCount, out int count))
|
||
{
|
||
Log.Logger.Error("{@name} 获取总页数失败,采集结束!", webSiteEntity.WebSiteName);
|
||
return;
|
||
}
|
||
DateTime dt = DateTime.MinValue;
|
||
do
|
||
{
|
||
model = GetMovieList(webSiteEntity, number++);
|
||
if (model == null)
|
||
{
|
||
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
|
||
return;
|
||
}
|
||
List<(string id, string name, string typeId)> idNameTypes = new List<(string, string, string)>();
|
||
foreach (var modelMovieInfoModel in model.MovieInfoModels)
|
||
{
|
||
if (webSiteEntity.LatestMoveTime > modelMovieInfoModel.LastUpdateTime)
|
||
{
|
||
Log.Logger.Information("{@name} 已检查到最后一条", webSiteEntity.WebSiteName);
|
||
break;
|
||
}
|
||
if (!_webSiteClassifyEntities.Any(x => x.WebSiteId == webSiteEntity.Id && x.TypeId.ToString() == modelMovieInfoModel.TypeId))
|
||
{
|
||
Log.Logger.Warning($"{{@name}} 未找到与{modelMovieInfoModel.TypeName}对应的分类,自动跳过", webSiteEntity.WebSiteName);
|
||
continue;
|
||
}
|
||
idNameTypes.Add((modelMovieInfoModel.Id, modelMovieInfoModel.Name, modelMovieInfoModel.TypeId));
|
||
}
|
||
|
||
if (idNameTypes.Count == 0)
|
||
{
|
||
if (webSiteEntity.LatestMoveTime < dt)
|
||
{
|
||
webSiteEntity.LatestMoveTime = dt;
|
||
webSiteEntity.Save();
|
||
}
|
||
Log.Information("{@name} 采集完成!", webSiteEntity.WebSiteName);
|
||
return;
|
||
}
|
||
dt = GetMovieInfos(webSiteEntity, idNameTypes);
|
||
} while (number <= count);
|
||
}
|
||
}
|
||
} |