From 363f7986fbaf3e5393e4d4f9da13f1bc180939e9 Mon Sep 17 00:00:00 2001 From: soeunkk Date: Sun, 15 May 2022 02:12:09 +0900 Subject: [PATCH] =?UTF-8?q?[feat]=20=EC=9D=98=EC=95=BD=ED=92=88=20?= =?UTF-8?q?=EC=A0=95=EB=B3=B4=20xlsx=20=EC=97=85=EB=8D=B0=EC=9D=B4?= =?UTF-8?q?=ED=8A=B8=20=EC=9C=A0=ED=8B=B8=EB=A6=AC=ED=8B=B0=20=EA=B5=AC?= =?UTF-8?q?=ED=98=84=20(#8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + build.gradle | 3 + .../pillaroid/component/JsoupComponent.java | 4 +- .../component/MedicineExcelUtils.java | 108 ++++++++++++++++++ .../pillaroid/service/MedicineService.java | 2 +- 5 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 src/main/java/com/nadoyagsa/pillaroid/component/MedicineExcelUtils.java diff --git a/.gitignore b/.gitignore index 58418dc..775c697 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,8 @@ out/ !**/src/main/**/out/ !**/src/test/**/out/ src/main/resources/application.properties +src/main/resources/**/medicine.xlsx +src/main/resources/**/medicine_result.xlsx src/test/java/com/nadoyagsa/pillaroid/service ### NetBeans ### diff --git a/build.gradle b/build.gradle index 6f062ef..109c928 100644 --- a/build.gradle +++ b/build.gradle @@ -28,6 +28,9 @@ dependencies { implementation 'org.jsoup:jsoup:1.14.3' + implementation 'org.apache.poi:poi:5.2.2' + implementation 'org.apache.poi:poi-ooxml:5.2.2' + implementation 'io.jsonwebtoken:jjwt-api:0.11.5' runtimeOnly 'io.jsonwebtoken:jjwt-impl:0.11.5' runtimeOnly 'io.jsonwebtoken:jjwt-jackson:0.11.5' diff --git a/src/main/java/com/nadoyagsa/pillaroid/component/JsoupComponent.java b/src/main/java/com/nadoyagsa/pillaroid/component/JsoupComponent.java index faebf00..18e44d4 100644 --- a/src/main/java/com/nadoyagsa/pillaroid/component/JsoupComponent.java +++ b/src/main/java/com/nadoyagsa/pillaroid/component/JsoupComponent.java @@ -13,8 +13,8 @@ @Component public class JsoupComponent { - public Medicine getMedicineInfo() { - final String medicineUrl = ""; + public Medicine getMedicineInfo(String url) { + final String medicineUrl = url; Connection conn = Jsoup.connect(medicineUrl); try { diff --git a/src/main/java/com/nadoyagsa/pillaroid/component/MedicineExcelUtils.java b/src/main/java/com/nadoyagsa/pillaroid/component/MedicineExcelUtils.java new file mode 100644 index 0000000..f9fda77 --- /dev/null +++ b/src/main/java/com/nadoyagsa/pillaroid/component/MedicineExcelUtils.java @@ -0,0 +1,108 @@ +package com.nadoyagsa.pillaroid.component; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; + +import org.apache.poi.xssf.usermodel.XSSFCell; +import org.apache.poi.xssf.usermodel.XSSFRow; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.core.io.ClassPathResource; +import org.springframework.stereotype.Component; + +import com.nadoyagsa.pillaroid.dto.Medicine; + +import lombok.RequiredArgsConstructor; + +@Component +@RequiredArgsConstructor +public class MedicineExcelUtils { + private final JsoupComponent jsoupComponent; + + public void updateMedicineExcel() throws IOException { + + ClassPathResource inputResource = new ClassPathResource("data/medicine.xlsx"); + + FileInputStream file = new FileInputStream(new File(inputResource.getURI())); + XSSFWorkbook workbook = new XSSFWorkbook(file); + + XSSFSheet sheet = workbook.getSheetAt(0); + int rows = sheet.getPhysicalNumberOfRows(); + int titleColIdx = 0; //의약품명이 적힌 열의 Idx + + for (int rowIdx = 1; rowIdx result = crawlMedicineInfo(productLink); + + //크롤링한 값을 셀에 저장 + result.forEach((colIdx, content) -> { + XSSFCell editCell = row.getCell(colIdx); + editCell.setCellValue(content); + }); + } else { + //값이 없으면 공백 저장 + for (int colIdx = 4; colIdx <= 6; colIdx++) { //4: 효능효과, 5: 용법용량, 6: 주의사항 + XSSFCell editCell = row.getCell(colIdx); + editCell.setCellValue(""); + } + } + } + } + + //파일에 저장 + File currDir = new File("."); + String path = currDir.getAbsolutePath(); + String fileLocation = path.substring(0, path.length() - 1) + "/src/main/resources/data/medicine_result.xlsx"; + FileOutputStream outStream = new FileOutputStream(fileLocation); + workbook.write(outStream); + workbook.close(); + } + + private String crawlProductLink(String itemName) throws IOException { + String encodingItemName = URLEncoder.encode(itemName, StandardCharsets.UTF_8); + String wikipediaUrl = "https://terms.naver.com/medicineSearch.naver?mode=nameSearch&query="+ encodingItemName; + Document wikipediaDoc = Jsoup.connect(wikipediaUrl).get(); + + Element firstElement = wikipediaDoc.selectFirst("ul[class=\"content_list\"]"); + if (firstElement == null) return null; //검색결과가 없으면 null 반환 + + Element firstTitleElement = firstElement + .selectFirst("li") + .selectFirst("div[class=\"info_area\"]") + .getElementsByClass("title").first() + .selectFirst("a"); + return firstTitleElement.attr("href"); + } + + private HashMap crawlMedicineInfo(String productLink) { + String detailBaseUrl = "https://terms.naver.com"; + String detailUrl = detailBaseUrl + productLink; + + Medicine medicineInfo = jsoupComponent.getMedicineInfo(detailUrl); + + //(수정할 colIdx, content)로 된 hashMap + HashMap result = new HashMap<>(); + result.put(4, medicineInfo.getEfficacy()); + result.put(5, medicineInfo.getUsage()); + result.put(6, medicineInfo.getPrecautions()); + return result; + } +} diff --git a/src/main/java/com/nadoyagsa/pillaroid/service/MedicineService.java b/src/main/java/com/nadoyagsa/pillaroid/service/MedicineService.java index d1b88fe..412dde9 100644 --- a/src/main/java/com/nadoyagsa/pillaroid/service/MedicineService.java +++ b/src/main/java/com/nadoyagsa/pillaroid/service/MedicineService.java @@ -15,6 +15,6 @@ public MedicineService(JsoupComponent jsoupComponent) { } public Medicine getMedicineInfo() { - return jsoupComponent.getMedicineInfo(); + return jsoupComponent.getMedicineInfo(""); } }