Skip to content
Browse files

prepare for first OSSRH release

  • Loading branch information...
1 parent aee143a commit 6317770e8b9411d0b6f31ed2e94eee74aea94848 @kylewm committed Jul 5, 2015
Showing with 172 additions and 45 deletions.
  1. +23 −1 README.md
  2. +90 −7 pom.xml
  3. +59 −37 src/main/java/com/kylewm/mf2j/Mf2Parser.java
View
24 README.md
@@ -2,7 +2,7 @@
In-development Java parser for microformats2. All the hard work is done by [Jsoup](http://jsoup.org/).
-Supports:
+Supports:
- basic property types
- implied properties
- rel-urls hash
@@ -14,3 +14,25 @@ TODO:
Live version: https://mf2j.herokuapp.com/?url=http://kylewm.com
Requirements: Java 1.5+
+
+## Installation
+
+```xml
+<dependency>
+ <groupId>com.kylewm</group>
+ <artifactId>mf2j</artifactId>
+ <version>0.0.4</artifact>
+</dependency>
+```
+
+## Usage
+
+```java
+import com.kylewm.mf2j.Mf2Parser;
+...
+
+Mf2Parser parser = new Mf2Parser()
+ .setIncludeAlternates(true)
+ .setIncludeRelUrls(true);
+Map<String,Object> parsed = parser.parse(new URI("https://kylewm.com"));
+```
View
97 pom.xml
@@ -2,13 +2,96 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.kylewm</groupId>
<artifactId>mf2j</artifactId>
- <version>0.0.4-SNAPSHOT</version>
- <name>Microformats2 Parser for Java</name>
+ <version>0.0.4</version>
+ <packaging>jar</packaging>
+ <name>mf2j</name>
+ <description>Microformats2 Parser for Java</description>
+ <url>https://github.com/kylewm/mf2j</url>
+
+ <licenses>
+ <license>
+ <name>CC0 1.0 Universal</name>
+ <url>http://creativecommons.org/publicdomain/zero/1.0/</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+
+ <developers>
+ <developer>
+ <name>Kyle Mahan</name>
+ <email>[email protected]</email>
+ <url>https://kylewm.com</url>
+ </developer>
+ </developers>
+
+ <scm>
+ <connection>scm:git:[email protected]:kylewm/mf2j.git</connection>
+ <developerConnection>scm:git:[email protected]:kylewm/mf2j.git</developerConnection>
+ <url>[email protected]:kylewm/mf2j.git</url>
+ </scm>
+
<dependencies>
- <dependency>
- <groupId>org.jsoup</groupId>
- <artifactId>jsoup</artifactId>
- <version>1.8.2</version>
- </dependency>
+ <dependency>
+ <groupId>org.jsoup</groupId>
+ <artifactId>jsoup</artifactId>
+ <version>1.8.2</version>
+ </dependency>
</dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <version>1.5</version>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>attach-sources</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>attach-javadocs</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+<!--
+ <plugin>
+ <groupId>org.sonatype.plugins</groupId>
+ <artifactId>nexus-staging-maven-plugin</artifactId>
+ <version>1.6.3</version>
+ <extensions>true</extensions>
+ <configuration>
+ <serverId>ossrh</serverId>
+ <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+ <autoReleaseAfterClose>true</autoReleaseAfterClose>
+ </configuration>
+ </plugin>
+-->
+ </plugins>
+ </build>
+
</project>
View
96 src/main/java/com/kylewm/mf2j/Mf2Parser.java
@@ -21,26 +21,52 @@
private boolean includeAlternates;
private boolean includeRelUrls;
+ /**
+ * Constructor
+ */
public Mf2Parser() {
this.includeAlternates = true;
this.includeRelUrls = false;
}
-
+
+ /**
+ * If true, include the "alternates" key in the parsed output.
+ * @param includeAlts
+ * @return this, for method chaining
+ */
public Mf2Parser setIncludeAlternates(boolean includeAlts) {
this.includeAlternates = includeAlts;
return this;
}
-
+
+ /**
+ * If true, include the experimental "rel-urls" hash in the parsed output.
+ * @param includeRelUrls
+ * @return this, for method chaining
+ */
public Mf2Parser setIncludeRelUrls(boolean includeRelUrls) {
this.includeRelUrls = includeRelUrls;
return this;
}
+ /**
+ * Fetch a remote resource and parse it for microformats2.
+ * @param resource the URI of the resource to fetch.
+ * @return a well-defined JSON structure containing the parsed microformats2 data.
+ * @throws IOException
+ */
public JsonDict parse(URI resource) throws IOException {
Document text = Jsoup.connect(resource.toString()).get();
return parse(text, resource);
}
+ /**
+ * Parse an existing document for microformats2.
+ * @param html the contents of the document to parse
+ * @param baseUri the URI where the document exists, used for normalization
+ * @return a well-defined JSON structure containing the parsed microformats2 data.
+ * @throws IOException
+ */
public JsonDict parse(String html, URI baseUri) {
Document doc = Jsoup.parse(html);
return parse(doc, baseUri);
@@ -54,13 +80,20 @@ private URI findBaseUri(Document doc, URI baseUri) {
return baseUri;
}
+ /**
+ * Parse an existing document for microformats2.
+ * @param doc the Jsoup document to parse
+ * @param baseUri the URI where the document exists, used for normalization
+ * @return a well-defined JSON structure containing the parsed microformats2 data.
+ * @throws IOException
+ */
public JsonDict parse(Document doc, URI baseUri) {
baseUri = findBaseUri(doc, baseUri);
-
+
JsonDict dict = new JsonDict();
JsonList items = dict.getOrCreateList("items");
parseMicroformats(doc, baseUri, items);
-
+
parseRels(doc, baseUri, dict);
return dict;
}
@@ -70,12 +103,12 @@ private void parseRels(Document doc, URI baseUri, JsonDict dict) {
if (includeRelUrls) {
dict.getOrCreateDict("rel-urls");
}
-
+
for (Element link : doc.select("a[rel][href],link[rel][href]")) {
String relStr = link.attr("rel");
String href = link.attr("href");
href = baseUri.resolve(href).toString();
-
+
JsonList rels = new JsonList();
for (String rel : relStr.split(" ")) {
rel = rel.trim();
@@ -143,7 +176,7 @@ private JsonDict parseMicroformat(Element elem, URI baseUri) {
parseProperties(child, baseUri, itemDict);
}
- if (!properties.containsKey("name")) {
+ if (!properties.containsKey("name")) {
String impliedName = parseImpliedName(elem);
if (impliedName != null) {
JsonList implNameList = new JsonList();
@@ -167,7 +200,7 @@ private JsonDict parseMicroformat(Element elem, URI baseUri) {
properties.put("photo", implPhotoList);
}
}
-
+
return itemDict;
}
@@ -279,8 +312,8 @@ private String parseDateTimeProperty(Element elem) {
return elem.attr("value");
}
return elem.text().trim();
-
-
+
+
}
private JsonDict parseHtmlProperty(Element elem) {
@@ -289,28 +322,28 @@ private JsonDict parseHtmlProperty(Element elem) {
dict.put("text", elem.text());
return dict;
}
-
+
private String parseImpliedPhoto(Element elem, URI baseUri) {
String href = parseImpliedPhotoRelative(elem);
if (href != null) {
return baseUri.resolve(href).toString();
}
return null;
}
-
+
private String parseImpliedPhotoRelative(Element elem) {
String[][] tagAttrs = {
{"img", "src"},
{"object", "data"},
};
-
+
for (String[] tagAttr : tagAttrs) {
String tag = tagAttr[0], attr = tagAttr[1];
if (tag.equals(elem.tagName()) && elem.hasAttr(attr)) {
return elem.attr(attr);
}
}
-
+
for (String[] tagAttr : tagAttrs) {
String tag = tagAttr[0], attr = tagAttr[1];
Elements children = filterByTag(elem.children(), tag);
@@ -321,7 +354,7 @@ private String parseImpliedPhotoRelative(Element elem) {
}
}
}
-
+
Elements children = elem.children();
if (children.size() == 1) {
Element child = children.first();
@@ -336,7 +369,7 @@ private String parseImpliedPhotoRelative(Element elem) {
}
}
}
-
+
return null;
}
@@ -350,12 +383,12 @@ private String parseImpliedUrl(Element elem, URI baseUri) {
private String parseImpliedUrlRelative(Element elem) {
// if a.h-x[href] or area.h-x[href] then use that [href] for url
- if (("a".equals(elem.tagName()) || "area".equals(elem.tagName()))
- && elem.hasAttr("href")) {
+ if (("a".equals(elem.tagName()) || "area".equals(elem.tagName()))
+ && elem.hasAttr("href")) {
return elem.attr("href");
}
//else if .h-x>a[href]:only-of-type:not[.h-*] then use that [href] for url
- //else if .h-x>area[href]:only-of-type:not[.h-*] then use that [href] for url
+ //else if .h-x>area[href]:only-of-type:not[.h-*] then use that [href] for url
for (String childTag : Arrays.asList("a", "area")) {
Elements children = filterByTag(elem.children(), childTag);
if(children.size() == 1) {
@@ -365,25 +398,25 @@ private String parseImpliedUrlRelative(Element elem) {
}
}
}
-
+
return null;
}
-
+
private String parseImpliedName(Element elem) {
if (("img".equals(elem.tagName()) || ("area".equals(elem.tagName())) && elem.hasAttr("alt"))) {
return elem.attr("alt");
}
if ("abbr".equals(elem.tagName()) && elem.hasAttr("title")) {
return elem.attr("title");
}
-
+
Elements children = elem.children();
if (children.size() == 1) {
Element child = children.first();
// else if .h-x>img:only-child[alt]:not[.h-*] then use that img alt for name
// else if .h-x>area:only-child[alt]:not[.h-*] then use that area alt for name
- if (!hasRootClass(child)
+ if (!hasRootClass(child)
&& ("img".equals(child.tagName()) || "area".equals(child.tagName()))
&& child.hasAttr("alt")) {
return child.attr("alt");
@@ -398,7 +431,7 @@ private String parseImpliedName(Element elem) {
Element grandChild = grandChildren.first();
// else if .h-x>:only-child>img:only-child[alt]:not[.h-*] then use that img alt for name
// else if .h-x>:only-child>area:only-child[alt]:not[.h-*] then use that area alt for name
- if (!hasRootClass(grandChild)
+ if (!hasRootClass(grandChild)
&& ("img".equals(grandChild.tagName()) || "area".equals(grandChild.tagName()))
&& grandChild.hasAttr("alt")) {
return grandChild.attr("alt");
@@ -409,13 +442,13 @@ private String parseImpliedName(Element elem) {
}
}
}
-
+
// else use the textContent of the .h-x for name
// drop leading & trailing white-space from name, including nbsp
return elem.text().trim();
}
-
+
private Elements filterByTag(Elements elems, String tag) {
Elements filtered = new Elements();
@@ -453,15 +486,4 @@ private boolean isRootClass(String className) {
return className.startsWith("h-");
}
- public static void main(String args[]) throws IOException, URISyntaxException {
- Mf2Parser p = new Mf2Parser()
- .setIncludeAlternates(true)
- .setIncludeRelUrls(true);
- JsonDict result = p.parse(
- "<link rel=\"alternate feed\" type=\"application/rdf+xml\" href=\"http://example.com/feed.xml\"/><a rel=\"me\" class=\"h-card\" href=\"/testing/me#profile\"><img src=\"/static/img/profile.jpg\"/>Kyle Mahan</a>",
- new URI("https://kylewm.com"));
- //JsonDict result = p.parse(new URI("https://kylewm.com"));
- System.out.println(result);
- }
-
}

0 comments on commit 6317770

Please sign in to comment.
Something went wrong with that request. Please try again.