Skip to content

Commit

Permalink
Indicate whether a redirect happened when retrieving info about the l…
Browse files Browse the repository at this point in the history
…ink, replace url with final url loaded. Retrieve canonical URL if specified.
  • Loading branch information
megamattron committed Oct 19, 2016
1 parent d894f48 commit d99f72f
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 5 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,8 @@ Currently this library retrieves the basic information for a link like title, de
# Tag documentation

* Twitter Cards: https://dev.twitter.com/cards/markup - Photo specific: https://dev.twitter.com/cards/types/photo
* Facebook OpenGraph: http://ogp.me/
* Facebook OpenGraph: http://ogp.me/

# Build instructions

Just run ```gradle jar```
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
group 'com.larvalabs'
version '0.1'
version '0.11'

apply plugin: 'java'

Expand Down
27 changes: 27 additions & 0 deletions src/main/java/com/larvalabs/linkunfurl/LinkInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
public class LinkInfo {

private String url;
private boolean redirected = false;
private String canonicalUrl;
private String type;
private String site;
private String title;
Expand All @@ -19,6 +21,11 @@ public class LinkInfo {
private Integer videoWidth;
private Integer videoHeight;

/**
* This is the URL that the content was ultimately loaded from. If isRedirected() is true then some number
* of redirects were necessary to get here.
* @return
*/
public String getUrl() {
return url;
}
Expand All @@ -27,6 +34,26 @@ public void setUrl(String url) {
this.url = url;
}

public boolean isRedirected() {
return redirected;
}

public void setRedirected(boolean redirected) {
this.redirected = redirected;
}

/**
* This is the canonical URL reported by the site, usually by way of a <link rel='canonical'/> tag.
* @return
*/
public String getCanonicalUrl() {
return canonicalUrl;
}

public void setCanonicalUrl(String canonicalUrl) {
this.canonicalUrl = canonicalUrl;
}

public String getType() {
return type;
}
Expand Down
20 changes: 17 additions & 3 deletions src/main/java/com/larvalabs/linkunfurl/LinkUnfurl.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.helper.HttpConnection;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

Expand Down Expand Up @@ -64,27 +65,40 @@ public static LinkInfo unfurl(String url, int timeoutMillis) throws IOException
.followRedirects(true)
.execute();

String finalUrl = response.url().toString();
boolean didRedirect = !finalUrl.equals(url);

String contentType = response.contentType();
if (contentType != null && contentType.toLowerCase().contains(TYPE_IMAGE)) {
// Don't attempt to parse, this is an image file - set as hero image
LinkInfo info = new LinkInfo();
info.setType(TYPE_IMAGE);
info.setImageUrl(url);
info.setImageUrl(finalUrl);
info.setRedirected(didRedirect);
setImageLength(info);
return info;
} else if (contentType != null && contentType.toLowerCase().contains(TYPE_VIDEO)) {
// Don't attempt to parse, this is a direct video file link
LinkInfo info = new LinkInfo();
info.setType(TYPE_VIDEO);
info.setVideoUrl(url);
info.setVideoUrl(finalUrl);
info.setRedirected(didRedirect);
setImageLength(info);
return info;
}

Document document = response.parse();

LinkInfo info = new LinkInfo();
info.setUrl(url);
info.setUrl(finalUrl);
info.setRedirected(didRedirect);

{
Elements elements = document.select("link[rel='canonical']");
if (elements.size() > 0) {
info.setCanonicalUrl(elements.get(0).attr("href"));
}
}

{
String twitterTitle = getMetaElementIfExists(document, METANAME_TWITTER_TITLE);
Expand Down
12 changes: 12 additions & 0 deletions src/test/java/com/larvalabs/linkunfurl/UnfurlTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ public void testInstagramVideo() throws Exception {
assertEquals("video", info.getType());
}

@Test
public void testShortenedLinkRedirect() throws Exception {
LinkInfo info = LinkUnfurl.unfurl("https://t.co/vnEZZzmsGF", 30000);
assertEquals("http://us.battle.net/sc2/en/blog/20325209/blizzcon-in-game-goodies-dva-announcer-and-portrait-10-17-2016", info.getUrl());
}

@Test
public void testCanonicalUrl() throws Exception {
LinkInfo info = LinkUnfurl.unfurl("http://www.ibtimes.co.uk/zetta-spanish-phone-brand-under-fire-allegedly-passing-off-rebranded-xiaomi-phones-their-own-1586988", 30000);
assertEquals("http://www.ibtimes.co.uk/zetta-spanish-phone-brand-under-fire-allegedly-passing-off-rebranded-xiaomi-phones-their-own-1586988", info.getCanonicalUrl());
}

@Test
public void testDirectVideoUrl() throws Exception {
String url = "http://www.html5videoplayer.net/videos/toystory.mp4";
Expand Down

0 comments on commit d99f72f

Please sign in to comment.