Externalizer.java
/*
* #%L
* wcm.io
* %%
* Copyright (C) 2014 wcm.io
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
package io.wcm.handler.url.impl;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.sling.api.SlingHttpServletRequest;
import org.apache.sling.api.resource.ResourceResolver;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import io.wcm.sling.commons.util.Escape;
/**
* Utility methods for externalizing URLs.
*/
final class Externalizer {
private Externalizer() {
// static util methods only
}
/**
* Externalizes an URL by applying Sling Mapping. Hostname and scheme are not added because they are added by the
* link handler depending on site URL configuration and secure/non-secure mode. URLs that are already externalized
* remain untouched.
* @param url Unexternalized URL (without scheme or hostname)
* @param resolver Resource resolver
* @param request Request
* @return Exernalized URL without scheme or hostname, but with short URLs (if configured in Sling Mapping is
* configured), and the path is URL-encoded if it contains special chars.
*/
public static @Nullable String externalizeUrl(@NotNull String url, @NotNull ResourceResolver resolver, @Nullable SlingHttpServletRequest request) {
return externalizeUrlWithSlingMapping(url, resolver, request, false);
}
/**
* Externalizes a URL by applying Sling Mapping. Hostname and scheme will be added. URLs that are already externalized
* remain untouched.
* @param url non-externalized URL (without scheme or hostname)
* @param resolver Resource resolver
* @param request Request
* @return Externalized URL with scheme or hostname, short URLs (if configured in Sling Mapping),
* and the path is URL-encoded if it contains special chars.
*/
public static @Nullable String externalizeUrlWithHost(@NotNull String url, @NotNull ResourceResolver resolver, @Nullable SlingHttpServletRequest request) {
return externalizeUrlWithSlingMapping(url, resolver, request, true);
}
@SuppressWarnings("java:S112") // allow runtime exception
private static @Nullable String externalizeUrlWithSlingMapping(@NotNull String url, @NotNull ResourceResolver resolver,
@Nullable SlingHttpServletRequest request, boolean keepHost) {
// apply externalization only path part
String path = url;
// split off query string or fragment that may be appended to the URL
String urlRemainder = null;
int urlRemainderPos = StringUtils.indexOfAny(path, '?', '#');
if (urlRemainderPos >= 0) {
urlRemainder = path.substring(urlRemainderPos);
path = path.substring(0, urlRemainderPos);
}
// apply reverse mapping based on current sling mapping configuration for current request
// e.g. to support a host-based prefix stripping mapping configuration configured at /etc/map
// please note: the sling map method does a lot of things:
// 1. applies reverse mapping depending on the sling mapping configuration
// (this can even add a hostname if defined in sling mapping configuration)
// 2. applies namespace mangling (e.g. replace jcr: with _jcr_)
// 3. adds webapp context path if required
// 4. url-encodes the whole url
if (request != null) {
path = resolver.map(request, path);
}
else {
path = resolver.map(path);
}
if (!keepHost) {
// remove scheme and hostname (probably added by sling mapping), but leave path in escaped form
try {
path = new URI(path).getRawPath();
// replace %2F back to / for better readability
path = StringUtils.replace(path, "%2F", "/");
} catch (URISyntaxException ex) {
throw new RuntimeException("Sling map method returned invalid URI: " + path, ex);
}
}
// build full URL again
if (path == null) {
return null;
}
else {
return path + (urlRemainder != null ? urlRemainder : "");
}
}
/**
* Externalizes an URL without applying Sling Mapping. Instead the servlet context path is added and sling namespace
* mangling is applied manually.
* Hostname and scheme are not added because they are added by the link handler depending on site URL configuration
* and secure/non-secure mode. URLs that are already externalized remain untouched.
* @param url Unexternalized URL (without scheme or hostname)
* @param request Request
* @return Exernalized URL without scheme or hostname, the path is URL-encoded if it contains special chars.
*/
public static @NotNull String externalizeUrlWithoutMapping(@NotNull String url, @Nullable SlingHttpServletRequest request) {
// apply externalization only path part
String path = url;
// split off query string or fragment that may be appended to the URL
String urlRemainder = null;
int urlRemainderPos = StringUtils.indexOfAny(path, '?', '#');
if (urlRemainderPos >= 0) {
urlRemainder = path.substring(urlRemainderPos);
path = path.substring(0, urlRemainderPos);
}
// apply namespace mangling (e.g. replace jcr: with _jcr_)
path = mangleNamespaces(path);
// add webapp context path
if (request != null) {
path = StringUtils.defaultString(request.getContextPath()) + path; //NOPMD
}
// url-encode path
path = Escape.urlEncode(path);
path = StringUtils.replace(path, "+", "%20");
// replace %2F back to / for better readability
path = StringUtils.replace(path, "%2F", "/");
// build full URL again
return path + (urlRemainder != null ? urlRemainder : "");
}
/*
* Detect as externalized:
* - everything staring with protocol and a colon is handled as externalized (http:, tel:, mailto:, javascript: etc.)
* - everything starting with // or # is handles as exteranlized
* - all other strings handles as not externalized
*/
private static final Pattern EXTERNALIZED_PATTERN = Pattern.compile("^([^/]+:|//|#).+?");
/**
* Checks if the given URL is already externalized.
* For this check some heuristics are applied.
* @param url URL
* @return true if path is already externalized.
*/
public static boolean isExternalized(@NotNull String url) {
return EXTERNALIZED_PATTERN.matcher(url).matches();
}
/**
* Checks if the given URL can be externalize, that means seems to be an content path that needs externalization.
* @param url URL
* @return true if url seems to be a path than needs externaliziation
*/
public static boolean isExternalizable(@NotNull String url) {
return StringUtils.startsWith(url, "/");
}
private static final String MANGLED_NAMESPACE_PREFIX = "/_";
private static final String MANGLED_NAMESPACE_SUFFIX = "_";
private static final char NAMESPACE_SEPARATOR = ':';
private static final Pattern NAMESPACE_PATTERN = Pattern.compile("/([^:/]+):");
/**
* Mangle the namespaces in the given path for usage in sling-based URLs.
* <p>
* Example: /path/jcr:content to /path/_jcr_content
* </p>
* @param path Path to mangle
* @return Mangled path
*/
public static @NotNull String mangleNamespaces(@NotNull String path) {
if (!StringUtils.contains(path, NAMESPACE_SEPARATOR)) {
return path;
}
Matcher matcher = NAMESPACE_PATTERN.matcher(path);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String replacement = MANGLED_NAMESPACE_PREFIX + matcher.group(1) + MANGLED_NAMESPACE_SUFFIX;
matcher.appendReplacement(sb, replacement);
}
matcher.appendTail(sb);
return sb.toString();
}
}