AO3-4359 Mass import bookmarks #2442
| @@ -143,12 +136,22 @@ class StoryParser | ||
| end | ||
| describe "#download_and_parse_chapters_into_story" do | ||
| + |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| valid_headers | ||
| - parsed_body = JSON.parse(response.body) | ||
| - expect(parsed_body.first["status"]).to eq "ok" | ||
| - expect(parsed_body.first["work_url"]).to eq work_url(@work) | ||
| - expect(parsed_body.first["created"]).to eq @work.created_at.as_json | ||
| + parsed_body = JSON.parse(response.body, symbolize_names: true) | ||
| + expect(parsed_body.first[:status]).to eq "ok" | ||
| + expect(parsed_body.first[:work_url]).to eq work_url(@work) | ||
| + expect(parsed_body.first[:created]).to eq @work.created_at.as_json | ||
| + end | ||
| + | ||
| + it "should return the original reference if one was provided" do | ||
| + post "/api/v1/works/urls", | ||
| + { original_urls: [ | ||
| + { id: "123", url: "foo" } |
|
Use 2 spaces for indentation in an array, relative to the start of the line where the left square bracket is.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| end | ||
| + |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| @@ -0,0 +1,67 @@ | ||
| +require 'spec_helper' | ||
| +require 'webmock/rspec' | ||
| + | ||
| +module ApiHelper | ||
| + |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + pseud_id = @user.default_pseud.id | ||
| + post "/api/v1/bookmarks/import", | ||
| + { archivist: @user.login, | ||
| + bookmarks: [ bookmark ] | ||
| + }.to_json, | ||
| + valid_headers | ||
| + first_bookmark = Bookmark.find_all_by_pseud_id(pseud_id).first | ||
| + bookmark_response = JSON.parse(response.body, symbolize_names: true)[:bookmarks].first | ||
| + assert_equal bookmark_response[:archive_url], bookmark_url(first_bookmark) | ||
| + end | ||
| + | ||
| + WebMock.allow_net_connect! | ||
| + end | ||
| + | ||
| + describe "Invalid API bookmark import" do | ||
| + before do |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + @user = create(:user) | ||
| + end | ||
| + | ||
| + after do | ||
| + WebMock.reset! | ||
| + end | ||
| + | ||
| + it "should return 400 Bad Request if an invalid URL is specified" do | ||
| + post "/api/v1/import", | ||
| + { archivist: @user.login, | ||
| + bookmarks: [ bookmark.merge!( { url: "" } ) ] }.to_json, | ||
| + valid_headers | ||
| + assert_equal 400, response.status | ||
| + end | ||
| + | ||
| + it "should return 400 Bad Request if no bookmarks are specified" do |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + end | ||
| + | ||
| + WebMock.allow_net_connect! | ||
| + end | ||
| + | ||
| + describe "Invalid API bookmark import" do | ||
| + before do | ||
| + mock_external | ||
| + @user = create(:user) | ||
| + end | ||
| + | ||
| + after do | ||
| + WebMock.reset! | ||
| + end | ||
| + | ||
| + it "should return 400 Bad Request if an invalid URL is specified" do |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + end | ||
| + | ||
| + describe "Invalid API bookmark import" do | ||
| + before do | ||
| + mock_external | ||
| + @user = create(:user) | ||
| + end | ||
| + | ||
| + after do | ||
| + WebMock.reset! | ||
| + end | ||
| + | ||
| + it "should return 400 Bad Request if an invalid URL is specified" do | ||
| + post "/api/v1/import", | ||
| + { archivist: @user.login, | ||
| + bookmarks: [ bookmark.merge!( { url: "" } ) ] }.to_json, |
|
Redundant curly braces around a hash parameter.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| @@ -0,0 +1,132 @@ | ||
| +require 'spec_helper' | ||
| +require 'api/api_helper' | ||
| + | ||
| +describe "API BookmarksController" do | ||
| + include ApiHelper | ||
| + | ||
| + # Override is_archivist so all users are archivists from this point on | ||
| + class User < ActiveRecord::Base | ||
| + def is_archivist? | ||
| + true | ||
| + end | ||
| + end | ||
| + | ||
| + bookmark = { | ||
| + id: "123", |
|
Use 2 spaces for indentation in a hash, relative to the start of the line where the left curly brace is.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| @@ -460,7 +460,11 @@ | ||
| namespace :api do | ||
| namespace :v1 do | ||
| - resources :import, only: [:create], defaults: { format: :json } | ||
| + resources :bookmarks, only: [:create], defaults: { format: :json} |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| @@ -65,7 +65,7 @@ | ||
| namespace :deploy do | ||
| desc "Restart the unicorns" | ||
| task :restart do | ||
| - find_servers(:roles => :app).each do |server| | ||
| + find_servers(:roles => [ :app, :web ]).each do |server| |
|
Use the new Ruby 1.9 hash syntax.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + original_id: external_work[:id], | ||
| + original_url: original_url, | ||
| + messages: work_messages | ||
| + } | ||
| + end | ||
| + | ||
| + # Work-level error handling for requests that are incomplete or too large | ||
| + def work_errors(work) | ||
| + status = :bad_request | ||
| + errors = [] | ||
| + urls = work[:chapter_urls] | ||
| + if urls.nil? | ||
| + errors << "This work doesn't contain chapter_urls. Works can only be imported from publicly-accessible URLs." | ||
| + elsif urls.length >= ArchiveConfig.IMPORT_MAX_CHAPTERS | ||
| + errors << "This work contains too many chapter URLs. A maximum of #{ ArchiveConfig.IMPORT_MAX_CHAPTERS }" + | ||
| + "chapters can be imported per work." |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + url: work_url, | ||
| + original_id: external_work[:id], | ||
| + original_url: original_url, | ||
| + messages: work_messages | ||
| + } | ||
| + end | ||
| + | ||
| + # Work-level error handling for requests that are incomplete or too large | ||
| + def work_errors(work) | ||
| + status = :bad_request | ||
| + errors = [] | ||
| + urls = work[:chapter_urls] | ||
| + if urls.nil? | ||
| + errors << "This work doesn't contain chapter_urls. Works can only be imported from publicly-accessible URLs." | ||
| + elsif urls.length >= ArchiveConfig.IMPORT_MAX_CHAPTERS | ||
| + errors << "This work contains too many chapter URLs. A maximum of #{ ArchiveConfig.IMPORT_MAX_CHAPTERS }" + |
|
Use
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + status = :bad_request | ||
| + errors = [] | ||
| + urls = work[:chapter_urls] | ||
| + if urls.nil? | ||
| + errors << "This work doesn't contain chapter_urls. Works can only be imported from publicly-accessible URLs." | ||
| + elsif urls.length >= ArchiveConfig.IMPORT_MAX_CHAPTERS | ||
| + errors << "This work contains too many chapter URLs. A maximum of #{ ArchiveConfig.IMPORT_MAX_CHAPTERS }" + | ||
| + "chapters can be imported per work." | ||
| + end | ||
| + status = :ok if errors.empty? | ||
| + [status, errors] | ||
| + end | ||
| + | ||
| + # send invitations to external authors for a given set of works | ||
| + def send_external_invites(works, archivist) | ||
| + external_authors = works.collect(&:external_authors).flatten.uniq |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| private | ||
| + # Set messages based on success and error flags | ||
| + def response_message(messages) | ||
| + if @some_success && @some_errors |
|
Use the return of the conditional for variable assignment and comparison.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + external_bookmarks.each do |external_bookmark| | ||
| + bookmarks_responses << import_bookmark(archivist, external_bookmark) | ||
| + end | ||
| + | ||
| + # set final response code and message depending on the flags | ||
| + messages = response_message(messages) | ||
| + end | ||
| + | ||
| + render status: status, json: { status: status, messages: messages, bookmarks: bookmarks_responses } | ||
| + end | ||
| + | ||
| + private | ||
| + | ||
| + # Set messages based on success and error flags | ||
| + def response_message(messages) | ||
| + if @some_success && @some_errors |
|
Use the return of the conditional for variable assignment and comparison.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + # if no works are supplied. If there is neither a valid archivist nor valid works, a 400 is returned | ||
| + # with both errors as a message | ||
| + def batch_errors(archivist, import_items) | ||
| + status = :bad_request | ||
| + errors = [] | ||
| + | ||
| + unless archivist && archivist.is_archivist? | ||
| + status = :forbidden | ||
| + errors << "The 'archivist' field must specify the name of an Archive user with archivist privileges." | ||
| + end | ||
| + | ||
| + if import_items.nil? || import_items.empty? | ||
| + errors << "No items to import were provided." | ||
| + elsif import_items.size >= ArchiveConfig.IMPORT_MAX_WORKS_BY_ARCHIVIST | ||
| + errors << "This request contains too many items to import. A maximum of #{ ArchiveConfig.IMPORT_MAX_WORKS_BY_ARCHIVIST }" + | ||
| + "items can be imported in one go by an archivist." |
|
Really minor complaint -- can this be rephrased to "at one time"/"at once"?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + # Top-level error handling: returns a 403 forbidden if a valid archivist isn't supplied and a 400 | ||
| + # if no works are supplied. If there is neither a valid archivist nor valid works, a 400 is returned | ||
| + # with both errors as a message | ||
| + def batch_errors(archivist, import_items) | ||
| + status = :bad_request | ||
| + errors = [] | ||
| + | ||
| + unless archivist && archivist.is_archivist? | ||
| + status = :forbidden | ||
| + errors << "The 'archivist' field must specify the name of an Archive user with archivist privileges." | ||
| + end | ||
| + | ||
| + if import_items.nil? || import_items.empty? | ||
| + errors << "No items to import were provided." | ||
| + elsif import_items.size >= ArchiveConfig.IMPORT_MAX_WORKS_BY_ARCHIVIST | ||
| + errors << "This request contains too many items to import. A maximum of #{ ArchiveConfig.IMPORT_MAX_WORKS_BY_ARCHIVIST }" + |
|
Use
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
bats eyelashes Can you include a link to the issue in the message at the top, please? (And edit the issue to have 0.9.34 as the release for now, as long as you're there?) Thanks!
Doh, I have no explanation for why I keep forgetting to put the ticket in the pull request.
| + bookmarks_responses << import_bookmark(archivist, external_bookmark) | ||
| + end | ||
| + | ||
| + # set final response code and message depending on the flags | ||
| + messages = response_message(messages) | ||
| + end | ||
| + | ||
| + render status: status, json: { status: status, messages: messages, bookmarks: bookmarks_responses } | ||
| + end | ||
| + | ||
| + private | ||
| + | ||
| + # Set messages based on success and error flags | ||
| + def response_message(messages) | ||
| + if @some_success && @some_errors | ||
| + messages << "At least one bookmark was not created. Please check the bookmark array for further information." |
|
Will an archivist know what the "bookmark array" is? (I ask mostly because I'm not looking at the page and I don't know.) Good point - that's an implementation detail that doesn't need to be in the message. The developer who implements the API will presumably turn the JSON array into something more user friendly, so I'll rephrase that as "individual bookmark messages".
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| @@ -0,0 +1,117 @@ | ||
| +class Api::V1::BookmarksController < Api::V1::BaseController |
|
Shouldn't this be ExternalBookmarksController? We may well want to add API for regular bookmarks at some point With all my focus on the Open Doors use case, I hadn't thought of the two bookmark types possibly being used in future - I'll rename this and have a think about whether the URL end-point needs to be changed as well. (i.e. would it still make sense to POST both external and internal bookmarks to Hmm, I was thinking about External Works, I realize now. And the actual bookmark object is the same, so all bookmark creation makes sense in BookmarksController, but where the process of creating external works belongs... IDK, probably it should be here after all and just made conditional on whether the url passed is for an AO3 work or not, that seems most parallel to the existing process since External Works controller just makes the new external work object and passes off the actual creation to the Bookmarks controller via the form. As discussed previously, I've added some error handling. As a general rule, I like to delegate as much as possible to the back-end when writing an API, as the model should be performing essential data validation before anything goes into the database, and this theoretically ensures consistency between the UI and the API. However, the ExternalWorks model doesn't check for the presence of a fandom and returns a weird message for a missing author, so I've intercepted some invalid requests in the API controller to return helpful messages from there instead, and added tests to check that this is happening.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + { archivist: @user.login, | ||
| + bookmarks: [ bookmark, bookmark ] | ||
| + }.to_json, | ||
| + valid_headers | ||
| + assert_equal 200, response.status | ||
| + end | ||
| + | ||
| + it "should create bookmarks associated with the archivist" do | ||
| + pseud_id = @user.default_pseud.id | ||
| + post "/api/v1/bookmarks/import", | ||
| + { archivist: @user.login, | ||
| + bookmarks: [ bookmark, bookmark ] | ||
| + }.to_json, | ||
| + valid_headers | ||
| + bookmarks = Bookmark.find_all_by_pseud_id(pseud_id) | ||
| + assert_equal bookmarks.count, 2 |
|
I'm confused by these two last specs. I assumed from the first one that duplicate bookmarks wouldn't be created, and the spec was therefore testing that it would return 200 even if only one bookmark were created. Then the second one seems to expect both to be created? In which case the first spec doesn't seem to be testing "when only some are created". My gut feeling is an exact duplicate should ideally not be created? So bookmarks.count should be 1 not 2? Good point about the "some" bookmarks spec. When I originally wrote the works controller, I returned different HTTP responses for these different states (all, none or some created). But as they're application errors, not HTTP errors, I now think 200 is the code to use. Anyway, I copy-pasted those into the bookmarks spec and forgot to clean up. I'll remove that test altogether. The second one does expect duplicates to be created because there doesn't seem to be a de-duping mechanism in the Bookmark model. If I bookmark the same work multiple times, I get multiple bookmarks (even though only one is listed under the work, interestingly). This may indeed be a unintended flaw in the Bookmark model, but it feels as if fixing it is beyond the scope of this particular ticket which aims to reproduce the web behaviour in the API (though a separate one which fixes the model and this test sounds like a good idea!) FWIW, my inclination would be it's more urgent to avoid creating the duplicates here because eg if an import fails halfway you would ideally like to be able to re-run without worrying about creating duplicate bookmarks? Good point. I'll see if it's easy to do without delving too intimately into the Bookmark model - for example, by searching for a bookmark owned by the archivist which already has that URL (sounds easy enough but we'll see!)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + url = bookmark_request[:external][:url] | ||
| + if url.nil? | ||
| + errors << "This bookmark does not contain a URL to an external site." | ||
| + end | ||
| + | ||
| + archivist_bookmarks = Bookmark.find_all_by_pseud_id(archivist.default_pseud.id) | ||
| + | ||
| + unless archivist_bookmarks.empty? | ||
| + archivist_bookmarks.each do |bookmark| | ||
| + if bookmark.bookmarkable_type == "ExternalWork" && ExternalWork.find(bookmark.bookmarkable_id).url == url | ||
| + errors << "There is already a bookmark for this archivist and the URL #{url}" | ||
| + end | ||
| + end | ||
| + end | ||
| + | ||
| + |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| @@ -0,0 +1,128 @@ | ||
| +class Api::V1::BookmarksController < Api::V1::BaseController | ||
| + respond_to :json | ||
| + | ||
| + def create | ||
| + archivist = User.find_by_login(params[:archivist]) | ||
| + external_bookmarks = params[:bookmarks] |
|
here is where I think the naming is confusing me -- these are just bookmarks and they happen to be of external works. Can we just call them bookmarks throughout? it looks to me like this code would actually work just fine for a NON external work too. I'm not sure it would work for the other types as is, because the Having said that, I didn't inspect the other bookmark types, and it's been a while since I unpicked how bookmarks are created, so you might well be right. I'll rename that variable to make it less external-centric which will make it easier to adapt this if we have a use case for creating other types of bookmarks in future.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + end | ||
| + | ||
| + describe "Invalid API bookmark import" do | ||
| + before do | ||
| + mock_external | ||
| + @user = create(:user) | ||
| + end | ||
| + | ||
| + after do | ||
| + WebMock.reset! | ||
| + end | ||
| + | ||
| + it "should return 400 Bad Request if an invalid URL is specified" do | ||
| + post "/api/v1/import", | ||
| + { archivist: @user.login, | ||
| + bookmarks: [ bookmark.merge!(url: "") ] |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + | ||
| + # Set messages based on success and error flags | ||
| + def response_message(messages) | ||
| + if @some_success && @some_errors | ||
| + messages << "At least one bookmark was not created. Please check the individual bookmark results for further information." | ||
| + elsif !@some_success && @some_errors | ||
| + messages << "None of the bookmarks were created. Please check the individual bookmark results for further information." | ||
| + else | ||
| + messages << "All bookmarks were successfully created." | ||
| + end | ||
| + messages | ||
| + end | ||
| + | ||
| + # Returns a hash | ||
| + def import_bookmark(archivist, params) | ||
| + bookmark_request = external_bookmark(archivist, params) |
|
Sorry to be nitpicky some more: it feels to me like even here it makes more sense to have this remain generic (eg "build_bookmark_request(user, params)") because the only place where it actually becomes specific to external works is in the currently named external_bookmark method and bookmark_errors (which is already named generically) -- and in those two places, I would suggest adding a comment explaining that for the moment this ONLY works for bookmarks for external works. There is basically no difference except if the "external" stuff is passed AFAIK.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + unless archivist_bookmarks.empty? | ||
| + archivist_bookmarks.each do |bookmark| | ||
| + if bookmark.bookmarkable_type == "ExternalWork" && ExternalWork.find(bookmark.bookmarkable_id).url == url | ||
| + errors << "There is already a bookmark for this archivist and the URL #{url}" | ||
| + end | ||
| + end | ||
| + end | ||
| + | ||
| + status = :ok if errors.empty? | ||
| + [status, errors] | ||
| + end | ||
| + | ||
| + # Map Json request to Bookmark request | ||
| + def external_bookmark(archivist, params) | ||
| + { | ||
| + pseud_id: archivist.default_pseud.id, |
|
and last question here: what kind of cleaning is happening on this user input and where? I might suggest also a test or two in the spec of submitting bad/malicious content to make sure it doesn't get passed through.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
| + end | ||
| + | ||
| + WebMock.allow_net_connect! | ||
| + end | ||
| + | ||
| + describe "Invalid API bookmark import" do | ||
| + before do | ||
| + mock_external | ||
| + @user = create(:user) | ||
| + end | ||
| + | ||
| + after do | ||
| + WebMock.reset! | ||
| + end | ||
| + | ||
| + |
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
|
https://otwarchive.atlassian.net/browse/AO3-4359
Adds bookmark importing capability to the mass import API. Open Doors sometimes import sites which include works that are links to stories on another site which is not being imported (and therefore hasn't granted permission to scrape in the story contents). This will allow those works to be imported as bookmarks on the Archive, referencing the story's currently location rather than importing it.
In order to achieve this, I have also:
api/v1/import, we now haveapi/v1/worksandapi/v1/bookmarks(paving the way for, say,collections,usersor, idk,videosin future, whatever comes up next)original_idfield to all the API requests so the remote site can pass in its own id for the story/bookmark it's importing and use that to match up the Archive's response to the relevant item (currently, the OD temporary sites rely on the responses being in the same order as the requests which is risky)routes.rbto use Ruby 1.9 hashes where possible