Put crawlers into crawler package & rename them. Adjust paths in RE…

…ADME
andrei-punko · Nov 4, 2024 · e645375 · e645375
1 parent b3e22ef
commit e645375
Show file tree

Hide file tree

Showing 7 changed files with 40 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -1,32 +1,37 @@
-
 # Collection of Java-based web crawlers
+
 [![Java CI with Maven](https://github.com/andrei-punko/java-crawlers/actions/workflows/maven.yml/badge.svg)](https://github.com/andrei-punko/java-crawlers/actions/workflows/maven.yml)
 
 ## Prerequisites
+
 - Maven 3
 - JDK 21
 
 ## How to build
+
 ```
 mvn clean install
 ```
 
 ## Common crawler functionality
-- Your crawler should extend [WebCrawler](crawler-engine/src/main/java/by/andd3dfx/crawler/engine/WebCrawler.java) 
-base crawler class
-- DTO class which describes collected data should implement 
-[CrawlerData](crawler-engine/src/main/java/by/andd3dfx/crawler/dto/CrawlerData.java) marker interface
+
+- Your crawler should extend [WebCrawler](crawler-engine/src/main/java/by/andd3dfx/crawler/engine/WebCrawler.java)
+  base crawler class
+- DTO class which describes collected data should implement
+  [CrawlerData](crawler-engine/src/main/java/by/andd3dfx/crawler/dto/CrawlerData.java) marker interface
 
 ## Crawler for Orthodox torrent tracker [pravtor.ru](http://pravtor.ru)
-Check [SearchUtil](pravtor.ru-crawler/src/main/java/by/andd3dfx/pravtor/util/SearchUtil.java) 
+
+Check [PravtorWebCrawler](pravtor.ru-crawler/src/main/java/by/andd3dfx/pravtor/crawler/PravtorWebCrawler.java)
 in `pravtor.ru-crawler` module for details
 
-To make search - run [run-search.bat](pravtor.ru-crawler/run-search.bat) script.  
+To make search - use [run-search.bat](pravtor.ru-crawler/run-search.bat) script.  
 Collected data will be placed into [result.xls](pravtor.ru-crawler/sandbox/result.xls) file in `sandbox` folder
 
-## Crawler for vacancies aggregator [rabota.by / hh.ru](http://rabota.by)
-Check [SearchUtil](rabota.by-crawler/src/main/java/by/andd3dfx/sitesparsing/rabotaby/SearchUtil.java) 
+## Crawler for vacancies aggregator [rabota.by](http://rabota.by) (it's localized version of [hh.ru](http://hh.ru) in Belarus)
+
+Check [RabotabyWebCrawler](rabota.by-crawler/src/main/java/by/andd3dfx/rabotaby/crawler/RabotabyWebCrawler.java)
 in `rabota.by-crawler` module for details
 
-To make search - run `main()` method of [MainApp](rabota.by-crawler/src/main/java/by/andd3dfx/sitesparsing/rabotaby/MainApp.java) 
+To make search - run `main()` method of [MainApp](rabota.by-crawler/src/main/java/by/andd3dfx/rabotaby/MainApp.java)
 class with populated output path in command line param
diff --git a/pravtor.ru-crawler/src/main/java/by/andd3dfx/pravtor/MainApp.java b/pravtor.ru-crawler/src/main/java/by/andd3dfx/pravtor/MainApp.java
@@ -3,15 +3,15 @@
 import by.andd3dfx.pravtor.dto.BatchSearchResult;
 import by.andd3dfx.pravtor.dto.TorrentData;
 import by.andd3dfx.pravtor.util.FileUtil;
-import by.andd3dfx.pravtor.util.SearchUtil;
+import by.andd3dfx.pravtor.crawler.PravtorWebCrawler;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 
 public class MainApp {
 
-    private static final SearchUtil searchUtil = new SearchUtil();;
+    private static final PravtorWebCrawler crawler = new PravtorWebCrawler();;
     private static final FileUtil fileUtil = new FileUtil();
 
     public static void main(String[] args) throws IOException {
@@ -26,7 +26,7 @@ public static void main(String[] args) throws IOException {
             String startingUrl = searchCriteria.url();
             String label = searchCriteria.topic();
 
-            var result = searchUtil.batchSearch(startingUrl, -1, 20)
+            var result = crawler.batchSearch(startingUrl, -1, 20)
                     .stream()
                     .filter(torrentData -> torrentData.getDownloadedCount() != null)
                     .sorted(Comparator.comparingInt(TorrentData::getDownloadedCount).reversed())

diff --git a/.../by/andd3dfx/pravtor/util/SearchUtil.java → ...fx/pravtor/crawler/PravtorWebCrawler.java b/.../by/andd3dfx/pravtor/util/SearchUtil.java → ...fx/pravtor/crawler/PravtorWebCrawler.java
@@ -1,4 +1,4 @@
-package by.andd3dfx.pravtor.util;
+package by.andd3dfx.pravtor.crawler;
 
 import by.andd3dfx.crawler.engine.WebCrawler;
 import by.andd3dfx.pravtor.dto.TorrentData;
@@ -14,7 +14,7 @@
  * Util to perform search on <a href="http://pravtor.ru">pravtor.ru</a> torrent tracker
  */
 @Slf4j
-public class SearchUtil extends WebCrawler<TorrentData> {
+public class PravtorWebCrawler extends WebCrawler<TorrentData> {
 
     private static final String BASE_URL = "https://pravtor.ru/";
 

diff --git a/...andd3dfx/pravtor/util/SearchUtilTest.java → ...ravtor/crawler/PravtorWebCrawlerTest.java b/...andd3dfx/pravtor/util/SearchUtilTest.java → ...ravtor/crawler/PravtorWebCrawlerTest.java
@@ -1,4 +1,4 @@
-package by.andd3dfx.pravtor.util;
+package by.andd3dfx.pravtor.crawler;
 
 import by.andd3dfx.pravtor.dto.TorrentData;
 import org.junit.Before;
@@ -9,27 +9,27 @@
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
 
-public class SearchUtilTest {
+public class PravtorWebCrawlerTest {
 
     private final String STARTING_URL = "https://pravtor.ru/viewforum.php?f=28";  // Святоотеческие тексты и жития святых
 
-    private SearchUtil searchUtil;
+    private PravtorWebCrawler crawler;
 
     @Before
     public void setup() {
-        searchUtil = new SearchUtil();
+        crawler = new PravtorWebCrawler();
     }
 
     @Test
     public void batchSearch() {
-        List<TorrentData> result = searchUtil.batchSearch(STARTING_URL, 2, 20);
+        List<TorrentData> result = crawler.batchSearch(STARTING_URL, 2, 20);
 
         assertThat("Wrong amount of result records", result.size(), is(100));
     }
 
     @Test
     public void singleSearch() {
-        var result = searchUtil.singleSearch(STARTING_URL);
+        var result = crawler.singleSearch(STARTING_URL);
 
         assertThat("Wrong amount of result records", result.dataItems().size(), is(50));
     }

diff --git a/rabota.by-crawler/src/main/java/by/andd3dfx/rabotaby/MainApp.java b/rabota.by-crawler/src/main/java/by/andd3dfx/rabotaby/MainApp.java
@@ -1,6 +1,6 @@
 package by.andd3dfx.rabotaby;
 
-import by.andd3dfx.rabotaby.util.SearchUtil;
+import by.andd3dfx.rabotaby.crawler.RabotabyWebCrawler;
 import by.andd3dfx.rabotaby.util.StatisticsUtil;
 
 import java.io.IOException;
@@ -15,9 +15,9 @@ public static void main(String[] args) throws IOException {
             throw new IllegalArgumentException("Path to output file should be populated!");
         }
 
-        var searchUtil = new SearchUtil();
-        var pageUrl = searchUtil.buildStartingSearchUrl("java");
-        var searchResult = searchUtil.batchSearch(pageUrl);
+        var crawler = new RabotabyWebCrawler();
+        var pageUrl = crawler.buildStartingSearchUrl("java");
+        var searchResult = crawler.batchSearch(pageUrl);
 
         var statisticsSortedMap = new StatisticsUtil().collectStatistics(searchResult);
 

diff --git a/...by/andd3dfx/rabotaby/util/SearchUtil.java → .../rabotaby/crawler/RabotabyWebCrawler.java b/...by/andd3dfx/rabotaby/util/SearchUtil.java → .../rabotaby/crawler/RabotabyWebCrawler.java
@@ -1,4 +1,4 @@
-package by.andd3dfx.rabotaby.util;
+package by.andd3dfx.rabotaby.crawler;
 
 import by.andd3dfx.crawler.engine.WebCrawler;
 import by.andd3dfx.rabotaby.dto.VacancyData;
@@ -13,7 +13,7 @@
 import java.util.stream.Collectors;
 
 @Slf4j
-public class SearchUtil extends WebCrawler<VacancyData> {
+public class RabotabyWebCrawler extends WebCrawler<VacancyData> {
 
     private final String BASE_URL = "http://rabota.by";
 

diff --git a/...ndd3dfx/rabotaby/util/SearchUtilTest.java → ...otaby/crawler/RabotabyWebCrawlerTest.java b/...ndd3dfx/rabotaby/util/SearchUtilTest.java → ...otaby/crawler/RabotabyWebCrawlerTest.java
@@ -1,4 +1,4 @@
-package by.andd3dfx.rabotaby.util;
+package by.andd3dfx.rabotaby.crawler;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -7,22 +7,20 @@
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 
-public class SearchUtilTest {
+public class RabotabyWebCrawlerTest {
 
     private static final int RECORDS_PER_PAGE = 20;
-    private SearchUtil searchUtil;
-    private StatisticsUtil statisticsUtil;
+    private RabotabyWebCrawler crawler;
 
     @Before
     public void setup() {
-        searchUtil = new SearchUtil();
-        statisticsUtil = new StatisticsUtil();
+        crawler = new RabotabyWebCrawler();
     }
 
     @Test
     public void singleSearch() {
-        var pageUrl = searchUtil.buildStartingSearchUrl("java");
-        var result = searchUtil.singleSearch(pageUrl);
+        var pageUrl = crawler.buildStartingSearchUrl("java");
+        var result = crawler.singleSearch(pageUrl);
 
         assertThat("Next url should be present", result.nextPageUrl(), is(
                 "http://rabota.by/search/vacancy?area=1002&text=java&page=1&hhtmFrom=vacancy_search_list"));
@@ -31,8 +29,8 @@ public void singleSearch() {
 
     @Test
     public void batchSearch() {
-        var pageUrl = searchUtil.buildStartingSearchUrl("java");
-        var searchResult = searchUtil.batchSearch(pageUrl, 2);
+        var pageUrl = crawler.buildStartingSearchUrl("java");
+        var searchResult = crawler.batchSearch(pageUrl, 2);
 
         assertThat(searchResult.size(), is(2 * RECORDS_PER_PAGE));
     }