using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
using System.Windows.Forms;
class Program
{
static List<string> mTargetURLList =
new List<string>() {
"http://oraclesqlpuzzle.ninja-web.net/",
"http://oraclesqlpuzzle.ninja-web.net/csharp/",
"http://oraclesqlpuzzle.ninja-web.net/php/"
};
const string BasicUser = "Basic認証のユーザ名";
const string BasicPass = "Basic認証のパスワード";
static HashSet<string> mCheckedURLSet = new HashSet<string>();
const string UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko";
[STAThread]
static void Main(string[] args)
{
// コマンドライン引数があれば優先して使用する
if (args.Length > 0) {
Predicate<string> IsURLPred = (pX) => pX.StartsWith("http://") || pX.StartsWith("https://");
// コマンドライン引数がURLの場合
if (IsURLPred(args[0])) {
mTargetURLList.Clear();
mTargetURLList.Add(args[0]);
}
// コマンドライン引数がテキストファイルの場合
else {
mTargetURLList = System.IO.File.ReadAllLines(args[0]).ToList();
// URLのみが対象
mTargetURLList = mTargetURLList.FindAll(pX => IsURLPred(pX));
}
}
// 空のURLは削除
mTargetURLList = mTargetURLList.FindAll(pX => pX != "");
mTargetURLList.ForEach(pX => Chech404(pX));
}
// 引数のURLの各タグの属性の404チェック
static void Chech404(string pTargetURL)
{
// 末尾が.htmlでなく、末尾が/でない場合は、末尾の/を補完
if (pTargetURL.EndsWith(".html") == false && pTargetURL.EndsWith("/") == false) {
pTargetURL += "/";
}
var InsNonDispBrowser = new NonDispBrowser();
InsNonDispBrowser.NavigateAndWait(AddBasic(pTargetURL));
HtmlDocument CurrDocument = InsNonDispBrowser.Document;
List<string> ATagHrefList = DeriveValList(CurrDocument, "a", "href");
List<string> ImgTagSrcList = DeriveValList(CurrDocument, "img", "src");
List<string> ImgTagDataSrcList = DeriveValList(CurrDocument, "img", "data-src");
List<string> FormTagActionList = DeriveValList(CurrDocument, "form", "action");
List<string> IframeTagActionList = DeriveValList(CurrDocument, "iframe", "src");
List<string> ScriptTagActionList = DeriveValList(CurrDocument, "script", "src");
List<string> LinkTagActionList = DeriveValList(CurrDocument, "link", "href");
WebRequest.DefaultWebProxy = null; // プロキシ未使用を明示
// 相対パス用で現在のディレクトリを求める
string CurrDirPath = Regex.Replace(pTargetURL, "[^/]+$", "");
var ConfirmList = new List<string>();
string[] TargetURLHtmlLineDataArr = null;
Action<List<string>, string, string> wkAct = (pList, pElementName, pMatchPattern) =>
{
foreach (string EachURL in pList) {
string CheckURL = EachURL;
if (CheckURL == "") continue;
if (CheckURL.StartsWith("javascript:")) continue; //JavaScript疑似プロトコル
if (CheckURL.StartsWith("mailto:")) continue; //mailtoプロトコル
if (CheckURL.StartsWith("tel:")) continue; //telプロトコル
if (CheckURL.StartsWith("about:blank")) continue; //about:blank
if (CheckURL == "#" && pElementName == "form") continue; //formタグでactionが#
if (mCheckedURLSet.Add(CheckURL) == false) continue;
// #なので確認
if (CheckURL.EndsWith("#")) {
ConfirmList.Add(string.Format(@"""#""なので確認 {0}", CheckURL));
continue;
}
// ネットワークパス参照の場合
if (CheckURL.StartsWith("//")) {
if (pTargetURL.StartsWith("http://")) CheckURL = "http:" + CheckURL;
if (pTargetURL.StartsWith("https://")) CheckURL = "https:" + CheckURL;
}
// ドキュメントルート相対パスの場合
if (CheckURL.StartsWith("/")) {
CheckURL = CurrDirPath + CheckURL.Substring(1);
}
// 相対パス指定の場合
if (CheckURL.StartsWith("http://") == false && CheckURL.StartsWith("https://") == false) {
CheckURL = CurrDirPath + CheckURL;
}
// ファイルでなく、末尾が/でない場合は、/を補完
if (Regex.IsMatch(CheckURL, @"/[^/.]+$")) {
CheckURL += "/";
}
int StatusCode = GetStatusCode(CheckURL);
if (StatusCode != 200 && StatusCode != 401) {
string ConfirmStr = string.Format("Status={0} URL={1}", StatusCode, CheckURL);
// if (CheckURL == "https://fonts.googleapis.com") continue;
// if (CheckURL == "https://fonts.gstatic.com") continue;
if (CheckURL.Contains(@"https://fonts.googleapis.com")) continue;
if (CheckURL.Contains(@"https://fonts.gstatic.com")) continue;
if (TargetURLHtmlLineDataArr == null) {
TargetURLHtmlLineDataArr = GetTargetURLHtmlLineDataArr(pTargetURL);
}
for (int I = 0; I <= TargetURLHtmlLineDataArr.GetUpperBound(0); I++) {
if (Regex.IsMatch(TargetURLHtmlLineDataArr[I],
string.Format(pMatchPattern, CheckURL))) {
ConfirmStr += Environment.NewLine;
string LTrimStr = Regex.Replace(TargetURLHtmlLineDataArr[I], @"^(\\t| )+", "");
ConfirmStr += string.Format("{0}行目の{1}", I + 1, LTrimStr);
break;
}
}
ConfirmList.Add(ConfirmStr);
}
}
};
wkAct(ATagHrefList, "a", @"<a.*href *= *""{0}""");
wkAct(ImgTagSrcList, "img", @"<img.*src *= *""{0}""");
wkAct(ImgTagDataSrcList, "img", @"<img.*data-src *= *""{0}""");
wkAct(FormTagActionList, "form", @"<form.*action *= *""{0}""");
wkAct(IframeTagActionList, "iframe", @"<iframe.*src *= *""{0}""");
wkAct(ScriptTagActionList, "script", @"<script.*src *= *""{0}""");
wkAct(LinkTagActionList, "link", @"<link.*href *= *""{0}""");
Console.WriteLine("■■■ ConfirmList Of {0} ■■■", pTargetURL);
for (int I = 0; I <= ConfirmList.Count - 1; I++) {
Console.WriteLine("{0:D2}番目 {1}", I + 1, ConfirmList[I]);
}
}
// Basic認証が無かったら付与
static string AddBasic(string pTargetURL)
{
var Ins_Z01_DeriveUserPassFromDomain = new _Z01_DeriveUserPassFromDomain(pTargetURL);
string BasicUser = Ins_Z01_DeriveUserPassFromDomain.GetUser();
string BasicPass = Ins_Z01_DeriveUserPassFromDomain.GetPass();
string AddStr = string.Format(@"{0}:{1}@", BasicUser, BasicPass);
if (pTargetURL.Contains(AddStr) == false) {
pTargetURL = Regex.Replace(pTargetURL, @"(?<=^https?://)", AddStr);
}
return pTargetURL;
}
// 要素名と属性名を引数として、属性値のListを返す
static List<string> DeriveValList(HtmlDocument pDoc, string pElementName, string pAttributeName)
{
var WillReturn = new List<string>();
foreach (HtmlElement e in pDoc.GetElementsByTagName(pElementName)) {
string WillAdd = e.GetAttribute(pAttributeName); // 属性値
if (WillReturn.Contains(WillAdd) == false) {
WillReturn.Add(WillAdd);
}
}
return WillReturn;
}
// urlにアクセスしてステータス・コードを返す
static public int GetStatusCode(string pURL)
{
try {
HttpWebRequest InsWebRequest = (HttpWebRequest)WebRequest.Create(pURL);
HttpWebResponse InsWebResponse = null;
// 認証の設定
var Ins_Z01_DeriveUserPassFromDomain = new _Z01_DeriveUserPassFromDomain(pURL);
string BasicUser = Ins_Z01_DeriveUserPassFromDomain.GetUser();
string BasicPass = Ins_Z01_DeriveUserPassFromDomain.GetPass();
InsWebRequest.Credentials = new System.Net.NetworkCredential(BasicUser, BasicPass);
HttpStatusCode StatusCode;
try {
InsWebResponse = (HttpWebResponse)InsWebRequest.GetResponse();
StatusCode = InsWebResponse.StatusCode;
}
catch (WebException ex) {
InsWebResponse = (HttpWebResponse)ex.Response;
if (InsWebResponse != null) {
StatusCode = InsWebResponse.StatusCode;
}
else {
throw; // サーバ接続不可などの場合は再スロー
}
}
finally {
if (InsWebResponse != null) {
InsWebResponse.Close();
}
}
return (int)StatusCode;
}
catch (Exception ex) {
Console.WriteLine("【{0}】へのアクセスで例外発生", pURL);
Console.WriteLine(ex.StackTrace);
return 999;
}
}
//チェック対象のhtmlの行ごとのstring型の配列を返す
static string[] GetTargetURLHtmlLineDataArr(string pTargetURL)
{
var WillReturn = new List<string>();
try {
var wc = new System.Net.WebClient();
wc.Headers.Add("user-agent", UserAgent);
var Ins_Z01_DeriveUserPassFromDomain = new _Z01_DeriveUserPassFromDomain(pTargetURL);
string BasicUser = Ins_Z01_DeriveUserPassFromDomain.GetUser();
string BasicPass = Ins_Z01_DeriveUserPassFromDomain.GetPass();
var namePassword = string.Format("{0}:{1}", BasicUser, BasicPass);
var chars = System.Text.Encoding.ASCII.GetBytes(namePassword);
var base64 = Convert.ToBase64String(chars);
wc.Headers[System.Net.HttpRequestHeader.Authorization] = "Basic " + base64;
System.IO.Stream st = wc.OpenRead(pTargetURL);
var sr = new System.IO.StreamReader(st);
string Line;
while ((Line = sr.ReadLine()) != null) {
WillReturn.Add(Line);
}
sr.Close(); wc.Dispose();
}
catch (Exception e) {
Console.WriteLine(e.Message);
}
return WillReturn.ToArray();
}
}