Skip to content

Commit

Permalink
#23 テストの変更・追加
Browse files Browse the repository at this point in the history
  • Loading branch information
TakenPt committed Apr 20, 2024
1 parent 88e7290 commit 456ff7d
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 62 deletions.
55 changes: 50 additions & 5 deletions Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ public partial class ScrapingAozoraService(ISplitBraceService splitBraceService,
private readonly ISplitBraceService _splitBraceService = splitBraceService;
private readonly IScrapingClientService _scrapingClientService = scrapingClientService;

private EpubDocument? _document;


public bool IsMatchSite(Uri uri)
{
return uri.Host == "www.aozora.gr.jp";
Expand Down Expand Up @@ -455,17 +452,65 @@ private static string GetCardUrl(string url)
private SplittedLineBuilder ParagraphLineBuilder = new SplittedLineBuilder();
private SplittedLineBuilder ScriptLineLineBuilder = new SplittedLineBuilder();


private int HeadingId = 0;

Check warning on line 455 in Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs

View workflow job for this annotation

GitHub Actions / build

The field 'ScrapingAozoraService.HeadingId' is assigned but its value is never used

Check warning on line 455 in Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs

View workflow job for this annotation

GitHub Actions / build

The field 'ScrapingAozoraService.HeadingId' is assigned but its value is never used

Check warning on line 455 in Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs

View workflow job for this annotation

GitHub Actions / test

The field 'ScrapingAozoraService.HeadingId' is assigned but its value is never used
private Dictionary<string, (int min, int max)> Classes = new Dictionary<string, (int, int)>();

/// <summary>
/// ある要素のChildrenに応じた処理を行います。
/// </summary>
/// <param name="document">追加処理を行う対象となるEpubDocument</param>
/// <param name="element">処理を行う要素</param>
internal void ProcessChildren(IElement element)
/// <param name="classes">適用される class のリスト</param>
internal void ProcessChildren(EpubDocument document, IElement element, string classes)
{

}

/// <summary>
/// <see cref="Classes"/>に基づき、EpubDocument内で使用するクラスを生成する。
/// </summary>
/// <param name="document"><see cref="CssClass"/>を変更するEpubDocument</param>
void AddCssClasses(EpubDocument document)
{
var classNames = new string[] { "jisage", "text_indent", "chitsuki" };

(int min, int max) value = (0, 0);
if (Classes.TryGetValue("jisage", out value))
{
for (int i = value.min; i <= value.max; i++)
{
document.CssClasses.Add(new CssClass("jisage", $@"
.jisage_{i} {{
margin-left: {i}em;
}}
"));
}
}
if (Classes.TryGetValue("text_indent", out value))
{
for (int i = value.min; i <= value.max; i++)
{
document.CssClasses.Add(new CssClass("text_indent", $@"
.text_indent_{i} {{
text-indent: {i}em;
}}
"));
}
}
if (Classes.TryGetValue("chitsuki", out value))
{
for (int i = value.min; i <= value.max; i++)
{
document.CssClasses.Add(new CssClass("chitsuki", $@"
.chitsuki_{i} {{
text-align: right;
margin-right: {i}em;
}}
"));
}
}
}


[System.Text.RegularExpressions.GeneratedRegex(@"(https://www\.aozora\.gr\.jp/cards/\d{6}/)files/(\d{1,})_\d{1,}(\.html)")]
private static partial System.Text.RegularExpressions.Regex UrlBookToCard();
Expand Down
144 changes: 87 additions & 57 deletions KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,91 +11,121 @@ public class ScrapingAozoraServiceTest
{
private static readonly EpubDocument EmptySingleParagraph = new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph()] }] }] };

public static object[][] ProcessChildrenTestCases()
/// <summary>
/// (htmlの要素の)テキストを"<div class = \"main_text\"></div>"で囲む
/// </summary>
/// <param name="text">divタグで囲むhtmlの要素</param>
/// <returns>divタグで囲まれた<paramref name="text"/></returns>
private static string ToMainText(string text)
{
// string: 読み込むhtml。これをclass = "main_text"なdivタグで囲ってテストに投げる
// EpubDocument: ProcessChildren実行前のScrapingAozoraService._document。
// CssClass[]: ProcessChildren実行前のScrapingAozoraService._document.CssClassesに追加したいCssClassを列挙する。
// EpubDocument: ProcessChildren実行後にあるべき、ScrapingAozoraService._document。
// CssClass[]: ProcessChildren実行後にあるべきScrapingAozoraService._document.CssClassesに追加したいCssClassを列挙する。
return @$"<div class = ""main_text"">{text}</div>";
}

(string, EpubDocument, CssClass[], EpubDocument, CssClass[])[] patterns = [
public static object[][] ProcessChildrenlayout1TestCases()
{
(string, Paragraph)[] cases = [
// レイアウト1.1 改丁
(@"<span class=""notes"">[#改丁]</span><br>", EmptySingleParagraph, [], new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph() { Text = "[#改丁]", ScriptLine = new ScriptLine("", "", "") }] }] }] }, []),
(@"<span class=""notes"">[#改丁]</span><br>", new Paragraph() { Text = "[#改丁]", ScriptLine = new ScriptLine("", "", "") }),
// レイアウト1.2 改ページ
(@"<span class=""notes"">[#改ページ]</span><br>", EmptySingleParagraph, [], new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph() { Text = "[#改ページ]", ScriptLine = new ScriptLine("", "", "") }] }] }] }, []),
(@"<span class=""notes"">[#改ページ]</span><br>", new Paragraph() { Text = "[#改ページ]", ScriptLine = new ScriptLine("", "", "") }),
// レイアウト1.3 改見開き
(@"<span class=""notes"">[#改見開き]</span><br>", EmptySingleParagraph, [], new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph() { Text = "[#改見開き]", ScriptLine = new ScriptLine("", "", "") }] }] }] }, []),
(@"<span class=""notes"">[#改見開き]</span><br>", new Paragraph() { Text = "[#改見開き]", ScriptLine = new ScriptLine("", "", "") }),
// レイアウト1.4 改段
(@"<span class=""notes"">[#改段]</span><br />", EmptySingleParagraph, [], new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph() { Text = "[#改段]", ScriptLine = new ScriptLine("", "", "") }] }] }] }, []),
(@"<span class=""notes"">[#改段]</span><br />", new Paragraph() { Text = "[#改段]", ScriptLine = new ScriptLine("", "", "") }),
];
return cases.Select(c => new object[] { ToMainText(c.Item1), c.Item2 }).ToArray();
}

for (int i = 0; i < patterns.Length; i++)
[Theory]
[MemberData(nameof(ProcessChildrenlayout1TestCases))]
public async void ProcessChildrenlayout1Test(string html, Paragraph expected)
{
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
var doc = await context.OpenAsync(request => request.Content(html));
var mainText = doc.QuerySelector(".main_text");
if (mainText == null)
Assert.Fail();
var scraper = new ScrapingAozoraService(new SplitBraceService(), new ScrapingClientService(new httpClientFactory(), TimeProvider.System));
var document = EmptySingleParagraph;

scraper.ProcessChildren(document, mainText, "");

Assert.Single(document.Chapters);
Assert.Single(document.Chapters[^1].Sections);
Assert.Single(document.Chapters[^1].Sections);
Assert.IsType<Paragraph>(document.Chapters[^1].Sections[^1].Elements[^1]);
if (document.Chapters[^1].Sections[^1].Elements[^1] is Paragraph paragraph)
{
patterns[i].Item2.CssClasses.AddRange(patterns[i].Item3);
patterns[i].Item4.CssClasses.AddRange(patterns[i].Item5);
Assert.Equal(expected.Text, paragraph.Text);
Assert.Equal(expected.ClassName, paragraph.ClassName);
Assert.NotNull(paragraph.ScriptLine);
Assert.Equal(expected.ScriptLine?.Text, paragraph.ScriptLine.Text);
}
return patterns.Select(c => new object[] { ToMainText(c.Item1), c.Item2, c.Item4 }).ToArray();
}

/// <summary>
/// (htmlの要素の)テキストを"<div class = \"main_text\"></div>"で囲む
/// </summary>
/// <param name="text">divタグで囲むhtmlの要素</param>
/// <returns>divタグで囲まれた<paramref name="text"/></returns>
private static string ToMainText(string text)
// Classes の各 value は、対応するclass で、ソースに出てきたものの内、最大のものの値をほじするようにする。
public static object[][] ProcessChildrenlayout2TestCases()
{
return @$"<div class = ""main_text"">{text}</div>";
(string, Paragraph[], (string, (int, int))[])[] cases = [
// レイアウト2.1 1行だけの字下げ
(@"<div class=""jisage_3"" style=""margin-left: 3em"">text<br /></div><br>", [new Paragraph() { Text = "text", ClassName = "jisage_3", ScriptLine = new ScriptLine("text", "", "") }], [("jisage", (1, 3))]),
// レイアウト2.2 ブロックでの字下げ
(@"<div class=""jisage_3"" style=""margin-left: 3em"">text1<br />text2<br /></div><br>", [new Paragraph() { Text = "text1", ClassName = "jisage_3", ScriptLine = new ScriptLine("text1", "", "") }, new Paragraph() { Text = "text2", ClassName = "jisage_3", ScriptLine = new ScriptLine("text2", "", "") },], [("jisage", (1, 3))]),
// レイアウト2.3 凹凸の複雑な字下げ
(@"<div class=""burasage"" style=""margin-left: 3em; text_indent: -1em;"">Long Text</div>", [new Paragraph() { Text = "Long Text", ClassName = "jisage_3 text_indent_-1" }], [("jisage", (1, 3)), ("text_indent", (-1, 0))]),
// レイアウト2.4 は特定の書き方について述べていないので省略。
// レイアウト2.5 地付き
(@"<div class=""chitsuki_0"" style=""text-align:right; margin-right: 0em"">text</div>", [new Paragraph() { Text = "text", ClassName = "chitsuki_0", ScriptLine = new ScriptLine("text", "", "") }], [("chitsuki", (0, 0))]),


// </div>の後の<br />がないパターン
(@"<div class=""jisage_3"" style=""margin-left: 3em"">text<br /></div>", [new Paragraph() { Text = "text", ClassName = "jisage_3", ScriptLine = new ScriptLine("text", "", "") }], [("jisage", (1, 3))]),
// </div>の前の<br />がないパターン
(@"<div class=""burasage"" style=""margin-left: 1em; text_indent: -1em;"">text</div>", [new Paragraph() { Text = "text", ClassName = "jisage_3 text_indent_-1", ScriptLine = new ScriptLine("text", "", "") }], [("jisage", (1, 3)), ("text_indent", (-1, 0))]),

];
return cases.Select(c => new object[] { ToMainText(c.Item1), c.Item2, c.Item3 }).ToArray();
}

[Theory]
[MemberData(nameof(ProcessChildrenTestCases))]
public async void ProcessChildrenTest(string html, EpubDocument initial, EpubDocument expected)
[MemberData(nameof(ProcessChildrenlayout2TestCases))]
public async void ProcessChildrenlayout2Test(string html, IReadOnlyCollection<Paragraph> expectedParagraphs, IEnumerable<(string, (int min, int max))> expectedDictionary)
{
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
var doc = await context.OpenAsync(request => request.Content(html));
var mainText = doc.QuerySelector(".main_text");
if (mainText == null)
Assert.Fail();
var scraper = new ScrapingAozoraService(new SplitBraceService(), new ScrapingClientService(new httpClientFactory(), TimeProvider.System));
scraper._document() = initial;
var document = EmptySingleParagraph;

scraper.ProcessChildren(mainText!);
scraper.ProcessChildren(document, mainText, "");

var actual = scraper._document();
Assert.Equal(expected.Title, actual.Title);
Assert.Equal(expected.Author, actual.Author);
Assert.Equal(expected.CssClasses, actual.CssClasses);
foreach ((var expectedChapter, var actualChapter) in expected.Chapters.Zip(actual.Chapters))
Assert.Single(document.Chapters);
Assert.Single(document.Chapters[^1].Sections);
Assert.Equal(expectedParagraphs.Count, document.Chapters[^1].Sections[^1].Elements.Count);
foreach ((var expectedParagraph, var actualElement) in expectedParagraphs.Zip(document.Chapters[^1].Sections[^1].Elements))
{
Assert.Equal(expectedChapter.Title, actualChapter.Title);
foreach ((var expectedSection, var actualSection) in expectedChapter.Sections.Zip(actualChapter.Sections))
Assert.IsType<Paragraph>(actualElement);
if (actualElement is Paragraph actualParagraph)
{
Assert.Equal(expectedSection.Title, actualSection.Title);
foreach ((var expectedElement, var actualElement) in expectedSection.Elements.Zip(actualSection.Elements))
{
switch (expectedElement, actualElement)
{
case (Paragraph expectedParagraph, Paragraph actualParagraph):
Assert.Equal(expectedParagraph.ClassName, actualParagraph.ClassName);
Assert.Equal(expectedParagraph.Text, actualParagraph.Text);
Assert.NotNull(expectedParagraph.ScriptLine);
Assert.NotNull(actualParagraph.ScriptLine);
Assert.Equal(expectedParagraph.ScriptLine.Text, actualParagraph.ScriptLine.Text);
break;
case (Picture expectedPicture, Picture actualPicture):
Assert.Equal(expectedPicture.ClassName, actualPicture.ClassName);
Assert.Equal(expectedPicture.PictureFilePath, actualPicture.PictureFilePath);
break;
default:
Assert.Fail();
break;
}
}
Assert.Equal(expectedParagraph.Text, actualParagraph.Text);
Assert.Equal(expectedParagraph.ClassName, actualParagraph.ClassName);
Assert.NotNull(actualParagraph.ScriptLine);
Assert.Equal(expectedParagraph.ScriptLine?.Text, actualParagraph.ScriptLine.Text);
}
// ScrapingAozoraService.Classes の確認
foreach ((var key, var exceptedValue) in expectedDictionary)
{
Assert.True(scraper._Classes().ContainsKey(key));
Assert.True(scraper._Classes()[key].min <= exceptedValue.min);
Assert.True(scraper._Classes()[key].max >= exceptedValue.max);
}
}
}


internal class httpClientFactory : IHttpClientFactory
{
public HttpClient CreateClient(string name)
Expand Down Expand Up @@ -160,6 +190,6 @@ file static class ScrapingAozora
[UnsafeAccessor(UnsafeAccessorKind.StaticMethod)]
public static extern (List<int> contentsIds, bool hasChapter, bool hasSection) LoadToc(ScrapingAozoraService? _, IDocument doc, EpubDocument epubDocument);

[UnsafeAccessor(UnsafeAccessorKind.Field)]
public static extern ref EpubDocument _document(this ScrapingAozoraService scraper);
[UnsafeAccessor(UnsafeAccessorKind.Field, Name = "Classes")]
public static extern Dictionary<string, (int min, int max)> _Classes(this ScrapingAozoraService scraper);
}

0 comments on commit 456ff7d

Please sign in to comment.