JXMovies/CMS/JXCMS.CMS.Movie/Spider/MovieSpider.cs

265 lines
13 KiB
C#
Raw Permalink Normal View History

2020-02-16 22:11:24 +08:00
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Xml;
using FreeSql;
using JXCMS.CMS.Movie.Entity;
using JXCMS.CMS.Movie.Models;
using JXCMS.Core.Extensions;
using JXCMS.Core.Log;
using SufeiUtil;
using Log = Serilog.Log;
namespace JXCMS.CMS.Movie.Spider
{
public class MovieSpider
{
private readonly List<WebSiteClassifyEntity> _webSiteClassifyEntities;
private readonly List<ActorEntity> _actorEntities;
private readonly List<DirectorEntity> _directorEntities;
private static MovieSpider _instance;
public static MovieSpider Instance => _instance ??= new MovieSpider();
private MovieSpider()
{
_webSiteClassifyEntities = WebSiteClassifyEntity.Select.ToList();
_actorEntities = ActorEntity.Select.ToList();
_directorEntities = DirectorEntity.Select.ToList();
}
public MovieListInfoModel GetMovieList(WebSiteEntity webSiteEntity, int number)
{
HttpItem item = new HttpItem();
item.URL = $"{webSiteEntity.ApiUrl}?pg={number}";
HttpHelper helper = new HttpHelper();
var result = helper.GetHtml(item);
if (result.StatusCode == HttpStatusCode.OK)
{
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
XmlDocument doc = new XmlDocument();
doc.LoadXml(result.Html);
var listNode = doc.SelectSingleNode("/rss/list");
MovieListInfoModel movieListInfoModel = new MovieListInfoModel();;
movieListInfoModel.CurrentPage = listNode.Attributes["page"].Value;
movieListInfoModel.PageCount = listNode.Attributes["pagecount"].Value;
movieListInfoModel.PageSize = listNode.Attributes["pagesize"].Value;
movieListInfoModel.RecordCount = listNode.Attributes["recordcount"].Value;
var videos = listNode.SelectNodes("//video");
foreach (XmlNode video in videos)
{
MovieInfoModel model = new MovieInfoModel
{
LastUpdateTime = DateTime.Parse(video.SelectSingleNode("last").InnerText),
Name = video.SelectSingleNode("name").InnerText,
TypeId = video.SelectSingleNode("tid").InnerText,
Id = video.SelectSingleNode("id").InnerText,
TypeName = video.SelectSingleNode("type").InnerText
};
movieListInfoModel.MovieInfoModels.Add(model);
}
return movieListInfoModel;
}
Log.Warning($"{{@name}} 获取api失败错误码{result.StatusCode}", webSiteEntity.WebSiteName);
return null;
}
public DateTime GetMovieInfos(WebSiteEntity webSiteEntity, List<(string id, string name, string typeId)> idNameTypes)
{
HttpItem item = new HttpItem();
item.URL = $"{webSiteEntity.ApiUrl}?ac=videolist&ids={string.Join(",", idNameTypes.Select(x => x.id))}";
HttpHelper helper = new HttpHelper();
var result = helper.GetHtml(item);
if (result.StatusCode == HttpStatusCode.OK)
{
DateTime dt = DateTime.MinValue;
Log.Logger.Information($"{{@name}} 成功获取地址:{item.URL}", webSiteEntity.WebSiteName);
XmlDocument doc = new XmlDocument();
doc.LoadXml(result.Html);
var videoNode = doc.SelectNodes("/rss/list/video");
foreach (XmlNode video in videoNode)
{
var name = video.SelectSingleNode("name").InnerText;
var director = video.SelectSingleNode("director").InnerText;
if (director.IsNullOrEmpty())
{
director = "未知";
}
var movieEntity = MovieEntity.Select.Include(x => x.DirectorEntity)
.Where(x => x.Name == name && x.DirectorEntity.DirectorName == director).First();
if (movieEntity == null)
{
movieEntity = new MovieEntity();
movieEntity.Area = video.SelectSingleNode("area").InnerText == "" ? "未知":video.SelectSingleNode("area").InnerText;
movieEntity.Des = video.SelectSingleNode("des").InnerText == "" ? "暂无介绍":video.SelectSingleNode("des").InnerText;
movieEntity.Lang = video.SelectSingleNode("lang").InnerText == "" ? "未知":video.SelectSingleNode("lang").InnerText;
movieEntity.Name = video.SelectSingleNode("name").InnerText;
movieEntity.Pic = video.SelectSingleNode("pic").InnerText == "" ? "/img/cover.jpg":video.SelectSingleNode("lang").InnerText;
movieEntity.Year = video.SelectSingleNode("year").InnerText == "" || video.SelectSingleNode("year").InnerText == "0" ? "未知":video.SelectSingleNode("year").InnerText;
movieEntity.ClassifyId = int.Parse(idNameTypes.First(x => x.id == video.SelectSingleNode("id").InnerText).typeId);
var directorId = _directorEntities.FirstOrDefault(x => x.DirectorName == director)?.Id;
if (directorId == null)
{
DirectorEntity directorEntity = new DirectorEntity();
directorEntity.DirectorName = director;
directorEntity.Save();
directorId = directorEntity.Id;
}
movieEntity.DirectorId = directorId.Value;
movieEntity.LastUpdate = DateTime.Parse(video.SelectSingleNode("last").InnerText);
movieEntity.Save();
if (movieEntity.LastUpdate > dt)
{
dt = movieEntity.LastUpdate;
}
var actorStr = video.SelectSingleNode("actor").InnerText;
if (actorStr.IsNullOrEmpty())
{
actorStr = "未知";
}
var actorsStr = actorStr.Split(',', ' ');
var actors = _actorEntities.Where(x => actorsStr.Contains(x.ActorName)).ToList();
foreach (var actor in actorsStr)
{
var id = actors.FirstOrDefault(x => x.ActorName == actor)?.Id;
if (id == null)
{
ActorEntity actorEntity = new ActorEntity {ActorName = actor};
actorEntity.Save();
id = actorEntity.Id;
}
MovieActorEntity movieActorEntity = new MovieActorEntity
{
ActorId = id.Value, MovieId = movieEntity.Id
};
movieActorEntity.Save();
}
}
var dds = video.SelectNodes("dl/dd");
var movieListEntity = MovieListEntity.Where(x => x.MovieId == movieEntity.Id && x.WebSiteId == webSiteEntity.Id)
.OrderByDescending(x => x.Id).First();
if (movieListEntity == null)
{
foreach (XmlNode dd in dds)
{
var number = dd.InnerText.Split('#');
if (number.Length <= 0 || !number[0].Split('$')[1].ToLower().EndsWith("m3u8"))
{
continue;
}
Log.Logger.Information("{@name} 获取到m3u8列表", webSiteEntity.WebSiteName);
var movieListEntities = number.Select(x =>
{
var sp = x.Split('$');
return new MovieListEntity
{
Name = sp[0],
Type = sp[2],
MovieId = movieEntity.Id,
PlayUrl = sp[1],
WebSiteId = webSiteEntity.Id
};
});
BaseEntity.Orm.Insert(movieListEntities).ExecuteAffrows();
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}成功!", webSiteEntity.WebSiteName);
}
}
else
{
var node = dds.Cast<XmlNode>().FirstOrDefault(x => x.Attributes["flag"].Value == movieListEntity.Type);
if (node == null)
{
Log.Logger.Error("{@name} 未找到指定的剧集列表!", webSiteEntity.WebSiteName);
}
bool flag = false;
foreach (var number in node.InnerText.Split('#'))
{
var sp = number.Split('$');
if (sp[0] == movieListEntity.Name)
{
flag = true;
continue;
}
if (!flag)
{
continue;
}
new MovieListEntity()
{
Name = sp[0],
MovieId = movieEntity.Id,
PlayUrl = sp[1],
Type = sp[2],
WebSiteId = webSiteEntity.Id
}.Save();
Log.Logger.Information($"{{@name}} 插入剧集{movieEntity.Name}-{sp[0]}成功!", webSiteEntity.WebSiteName);
}
}
}
return dt;
}
return DateTime.MinValue;
}
public void StartSpider(WebSiteEntity webSiteEntity, bool reGetAll = false)
{
var model = GetMovieList(webSiteEntity, 1);
if (model == null)
{
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
return;
}
int number = 1;
if (!int.TryParse(model.PageCount, out int count))
{
Log.Logger.Error("{@name} 获取总页数失败,采集结束!", webSiteEntity.WebSiteName);
return;
}
DateTime dt = DateTime.MinValue;
do
{
model = GetMovieList(webSiteEntity, number++);
if (model == null)
{
Log.Logger.Error("{@name} 获取信息失败,采集结束!", webSiteEntity.WebSiteName);
return;
}
List<(string id, string name, string typeId)> idNameTypes = new List<(string, string, string)>();
foreach (var modelMovieInfoModel in model.MovieInfoModels)
{
if (webSiteEntity.LatestMoveTime > modelMovieInfoModel.LastUpdateTime)
{
Log.Logger.Information("{@name} 已检查到最后一条", webSiteEntity.WebSiteName);
break;
}
if (!_webSiteClassifyEntities.Any(x => x.WebSiteId == webSiteEntity.Id && x.TypeId.ToString() == modelMovieInfoModel.TypeId))
{
Log.Logger.Warning($"{{@name}} 未找到与{modelMovieInfoModel.TypeName}对应的分类,自动跳过", webSiteEntity.WebSiteName);
continue;
}
idNameTypes.Add((modelMovieInfoModel.Id, modelMovieInfoModel.Name, modelMovieInfoModel.TypeId));
}
if (idNameTypes.Count == 0)
{
if (webSiteEntity.LatestMoveTime < dt)
{
webSiteEntity.LatestMoveTime = dt;
webSiteEntity.Save();
}
Log.Information("{@name} 采集完成!", webSiteEntity.WebSiteName);
return;
}
dt = GetMovieInfos(webSiteEntity, idNameTypes);
} while (number <= count);
}
}
}