Skip to content

Commit

Permalink
[feat] 의약품 세부 정보 조회 크롤링 (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
youngniw committed May 14, 2022
1 parent e04f13a commit 1d70ff5
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 3 deletions.
8 changes: 5 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ dependencies {
annotationProcessor 'org.projectlombok:lombok'
runtimeOnly 'mysql:mysql-connector-java'

implementation 'io.jsonwebtoken:jjwt-api:0.11.2'
runtimeOnly 'io.jsonwebtoken:jjwt-impl:0.11.2'
runtimeOnly 'io.jsonwebtoken:jjwt-jackson:0.11.2'
implementation 'org.jsoup:jsoup:1.14.3'

implementation 'io.jsonwebtoken:jjwt-api:0.11.5'
runtimeOnly 'io.jsonwebtoken:jjwt-impl:0.11.5'
runtimeOnly 'io.jsonwebtoken:jjwt-jackson:0.11.5'

testImplementation 'org.springframework.boot:spring-boot-starter-test'
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package com.nadoyagsa.pillaroid.component;

import com.nadoyagsa.pillaroid.dto.Appearance;
import com.nadoyagsa.pillaroid.dto.Medicine;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.io.IOException;

@Component
public class JsoupComponent {
public Medicine getMedicineInfo() {
final String medicineUrl = "";

Connection conn = Jsoup.connect(medicineUrl);
try {
Document document = conn.get();

Elements nameElements = document.getElementsByClass("stress");
if (nameElements.size() > 0) {
Element parent = nameElements.get(0).parent();
// 네이버 의약품 검색 크롤링 중 <p></p>로 인해 잘린 부분이 있을 수 있어서 다음으로 변경
String parentProcessed = parent.html().replace("</p>\n<p></p>", "").replace("<p></p>", "");

Document documentProcessed = Jsoup.parse(parentProcessed);

return getMedicineInfo(documentProcessed);
}
return getMedicineInfo(document);
} catch (IOException e) { e.printStackTrace(); }

return null;
}

public Medicine getMedicineInfo(Document document) { // 지식백과에서의 의약품명 크롤링
Medicine medicine = new Medicine();

Elements nameElements = document.getElementsByClass("stress");
for (Element nameElement : nameElements) { // 외형정보, 성분정보, 저장방법, 효능효과, 용법용량, 사용상 주의사항
String topic = nameElement.text();
Element textElement = nameElement.nextElementSibling();

if (textElement != null) {
String text = textElement.html()
.replace("<br>", "\n") // 줄바꿈 모두 저장
.replaceAll("<[^>]*>", ""); // 태그 모두 제거

switch (topic) {
case "외형정보":
Appearance appearanceInfo = new Appearance();

String[] splitTopic = text.split("·");
for (String subTopic : splitTopic) {
String[] information = subTopic.split(":");

if (information[0].contains("성상"))
appearanceInfo.setAppearance(information[1].trim());
else if (information[0].contains("제형"))
appearanceInfo.setFormulation(information[1].trim());
else if (information[0].contains("모양"))
appearanceInfo.setShape(information[1].trim());
else if (information[0].contains("색상"))
appearanceInfo.setColor(information[1].trim());
else if (information[0].contains("분할선"))
appearanceInfo.setDividingLine(information[1].trim());
else if (information[0].contains("식별표기"))
appearanceInfo.setIdentificationMark(information[1].trim());
}
medicine.setAppearanceInfo(appearanceInfo);
break;
case "성분정보":
medicine.setIngredient(text);
break;
case "저장방법":
medicine.setSave(text);
break;
case "효능효과":
medicine.setEfficacy(text);
break;
case "용법용량":
medicine.setUsage(text);
break;
case "사용상 주의사항":
medicine.setPrecautions(text);
break;
}
}
}
return medicine;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.nadoyagsa.pillaroid.controller;

import com.nadoyagsa.pillaroid.dto.Medicine;
import com.nadoyagsa.pillaroid.service.MedicineService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import javax.servlet.http.HttpServletRequest;

@RestController
@RequestMapping(value = "/medicine")
public class MedicineController {
private final MedicineService medicineService;

@Autowired
public MedicineController(MedicineService medicineService) {
this.medicineService = medicineService;
}

//TODO: 추후에 medicine명 혹은 품목일련번호로 전달받아야 함
@GetMapping("/info")
public Medicine getMedicineInfo(HttpServletRequest request) {
return medicineService.getMedicineInfo();
}
}
19 changes: 19 additions & 0 deletions src/main/java/com/nadoyagsa/pillaroid/dto/Appearance.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.nadoyagsa.pillaroid.dto;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class Appearance {
private String appearance; // 성상
private String formulation; // 제형
private String shape; // 모양
private String color; // 색상
private String dividingLine; // 분할선
private String identificationMark; // 식별표기
}
16 changes: 16 additions & 0 deletions src/main/java/com/nadoyagsa/pillaroid/dto/Medicine.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.nadoyagsa.pillaroid.dto;

import lombok.*;

@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class Medicine {
private Appearance appearanceInfo; // 외형정보
private String ingredient; // 성분정보
private String save; // 저장방법
private String efficacy; // 효능효과
private String usage; // 용법용량
private String precautions; // 사용상 주의사항
}
20 changes: 20 additions & 0 deletions src/main/java/com/nadoyagsa/pillaroid/service/MedicineService.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package com.nadoyagsa.pillaroid.service;

import com.nadoyagsa.pillaroid.component.JsoupComponent;
import com.nadoyagsa.pillaroid.dto.Medicine;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class MedicineService {
private final JsoupComponent jsoupComponent;

@Autowired
public MedicineService(JsoupComponent jsoupComponent) {
this.jsoupComponent = jsoupComponent;
}

public Medicine getMedicineInfo() {
return jsoupComponent.getMedicineInfo();
}
}

0 comments on commit 1d70ff5

Please sign in to comment.