Permalink
Browse files

[SPARK-18958][SPARKR] R API toJSON on DataFrame

## What changes were proposed in this pull request?

It would make it easier to integrate with other component expecting row-based JSON format.
This replaces the non-public toJSON RDD API.

## How was this patch tested?

manual, unit tests

Author: Felix Cheung <[email protected]>

Closes #16368 from felixcheung/rJSON.
  • Loading branch information...
1 parent f252cb5 commit 17579bda3c114022a0b3889aa4c9188307af75e9 @felixcheung felixcheung committed with Felix Cheung Dec 23, 2016
Showing with 26 additions and 18 deletions.
  1. +1 −0 R/pkg/NAMESPACE
  2. +18 −12 R/pkg/R/DataFrame.R
  3. +7 −6 R/pkg/inst/tests/testthat/test_sparkSQL.R
View
@@ -133,6 +133,7 @@ exportMethods("arrange",
"summarize",
"summary",
"take",
+ "toJSON",
"transform",
"union",
"unionAll",
View
@@ -737,26 +737,32 @@ setMethod("repartition",
#' toJSON
#'
-#' Convert the rows of a SparkDataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
+#' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
#'
-#' @param x A SparkDataFrame
-#' @return A StringRRDD of JSON objects
+#' Each row is turned into a JSON document with columns as different fields.
+#' The returned SparkDataFrame has a single character column with the name \code{value}
+#'
+#' @param x a SparkDataFrame
+#' @return a SparkDataFrame
+#' @family SparkDataFrame functions
+#' @rdname toJSON
+#' @name toJSON
#' @aliases toJSON,SparkDataFrame-method
-#' @noRd
+#' @export
#' @examples
#'\dontrun{
#' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' newRDD <- toJSON(df)
+#' path <- "path/to/file.parquet"
+#' df <- read.parquet(path)
+#' df_json <- toJSON(df)
#'}
+#' @note toJSON since 2.2.0
setMethod("toJSON",
signature(x = "SparkDataFrame"),
function(x) {
- rdd <- callJMethod(x@sdf, "toJSON")
- jrdd <- callJMethod(rdd, "toJavaRDD")
- RDD(jrdd, serializedMode = "string")
+ jsonDS <- callJMethod(x@sdf, "toJSON")
+ df <- callJMethod(jsonDS, "toDF")
+ dataFrame(df)
})
#' Save the contents of SparkDataFrame as a JSON file
@@ -936,7 +942,7 @@ setMethod("unique",
#' Sample
#'
-#' Return a sampled subset of this SparkDataFrame using a random seed.
+#' Return a sampled subset of this SparkDataFrame using a random seed.
#' Note: this is not guaranteed to provide exactly the fraction specified
#' of the total count of of the given SparkDataFrame.
#'
@@ -1689,12 +1689,13 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
unlink(jsonPath3)
})
-test_that("toJSON() returns an RDD of the correct values", {
- df <- read.json(jsonPath)
- testRDD <- toJSON(df)
- expect_is(testRDD, "RDD")
- expect_equal(getSerializedMode(testRDD), "string")
- expect_equal(collectRDD(testRDD)[[1]], mockLines[1])
+test_that("toJSON() on DataFrame", {
+ df <- as.DataFrame(cars)
+ df_json <- toJSON(df)
+ expect_is(df_json, "SparkDataFrame")
+ expect_equal(colnames(df_json), c("value"))
+ expect_equal(head(df_json, 1),
+ data.frame(value = "{\"speed\":4.0,\"dist\":2.0}", stringsAsFactors = FALSE))
})
test_that("showDF()", {

0 comments on commit 17579bd

Please sign in to comment.