1 /*
2 * #%L
3 * wcm.io
4 * %%
5 * Copyright (C) 2014 wcm.io
6 * %%
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * #L%
19 */
20 package io.wcm.handler.url.impl;
21
22 import java.net.URI;
23 import java.net.URISyntaxException;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import org.apache.commons.lang3.StringUtils;
28 import org.apache.sling.api.SlingHttpServletRequest;
29 import org.apache.sling.api.resource.ResourceResolver;
30 import org.jetbrains.annotations.NotNull;
31 import org.jetbrains.annotations.Nullable;
32
33 import io.wcm.sling.commons.util.Escape;
34
35 /**
36 * Utility methods for externalizing URLs.
37 */
38 final class Externalizer {
39
40 private Externalizer() {
41 // static util methods only
42 }
43
44 /**
45 * Externalizes an URL by applying Sling Mapping. Hostname and scheme are not added because they are added by the
46 * link handler depending on site URL configuration and secure/non-secure mode. URLs that are already externalized
47 * remain untouched.
48 * @param url Unexternalized URL (without scheme or hostname)
49 * @param resolver Resource resolver
50 * @param request Request
51 * @return Exernalized URL without scheme or hostname, but with short URLs (if configured in Sling Mapping is
52 * configured), and the path is URL-encoded if it contains special chars.
53 */
54 public static @Nullable String externalizeUrl(@NotNull String url, @NotNull ResourceResolver resolver, @Nullable SlingHttpServletRequest request) {
55 return externalizeUrlWithSlingMapping(url, resolver, request, false);
56 }
57
58 /**
59 * Externalizes a URL by applying Sling Mapping. Hostname and scheme will be added. URLs that are already externalized
60 * remain untouched.
61 * @param url non-externalized URL (without scheme or hostname)
62 * @param resolver Resource resolver
63 * @param request Request
64 * @return Externalized URL with scheme or hostname, short URLs (if configured in Sling Mapping),
65 * and the path is URL-encoded if it contains special chars.
66 */
67 public static @Nullable String externalizeUrlWithHost(@NotNull String url, @NotNull ResourceResolver resolver, @Nullable SlingHttpServletRequest request) {
68 return externalizeUrlWithSlingMapping(url, resolver, request, true);
69 }
70
71 @SuppressWarnings("java:S112") // allow runtime exception
72 private static @Nullable String externalizeUrlWithSlingMapping(@NotNull String url, @NotNull ResourceResolver resolver,
73 @Nullable SlingHttpServletRequest request, boolean keepHost) {
74
75 // apply externalization only path part
76 String path = url;
77
78 // split off query string or fragment that may be appended to the URL
79 String urlRemainder = null;
80 int urlRemainderPos = StringUtils.indexOfAny(path, '?', '#');
81 if (urlRemainderPos >= 0) {
82 urlRemainder = path.substring(urlRemainderPos);
83 path = path.substring(0, urlRemainderPos);
84 }
85
86 // apply reverse mapping based on current sling mapping configuration for current request
87 // e.g. to support a host-based prefix stripping mapping configuration configured at /etc/map
88
89 // please note: the sling map method does a lot of things:
90 // 1. applies reverse mapping depending on the sling mapping configuration
91 // (this can even add a hostname if defined in sling mapping configuration)
92 // 2. applies namespace mangling (e.g. replace jcr: with _jcr_)
93 // 3. adds webapp context path if required
94 // 4. url-encodes the whole url
95 if (request != null) {
96 path = resolver.map(request, path);
97 }
98 else {
99 path = resolver.map(path);
100 }
101
102 if (!keepHost) {
103 // remove scheme and hostname (probably added by sling mapping), but leave path in escaped form
104 try {
105 path = new URI(path).getRawPath();
106 // replace %2F back to / for better readability
107 path = StringUtils.replace(path, "%2F", "/");
108 } catch (URISyntaxException ex) {
109 throw new RuntimeException("Sling map method returned invalid URI: " + path, ex);
110 }
111 }
112
113 // build full URL again
114 if (path == null) {
115 return null;
116 }
117 else {
118 return path + (urlRemainder != null ? urlRemainder : "");
119 }
120 }
121
122 /**
123 * Externalizes an URL without applying Sling Mapping. Instead the servlet context path is added and sling namespace
124 * mangling is applied manually.
125 * Hostname and scheme are not added because they are added by the link handler depending on site URL configuration
126 * and secure/non-secure mode. URLs that are already externalized remain untouched.
127 * @param url Unexternalized URL (without scheme or hostname)
128 * @param request Request
129 * @return Exernalized URL without scheme or hostname, the path is URL-encoded if it contains special chars.
130 */
131 public static @NotNull String externalizeUrlWithoutMapping(@NotNull String url, @Nullable SlingHttpServletRequest request) {
132
133 // apply externalization only path part
134 String path = url;
135
136 // split off query string or fragment that may be appended to the URL
137 String urlRemainder = null;
138 int urlRemainderPos = StringUtils.indexOfAny(path, '?', '#');
139 if (urlRemainderPos >= 0) {
140 urlRemainder = path.substring(urlRemainderPos);
141 path = path.substring(0, urlRemainderPos);
142 }
143
144 // apply namespace mangling (e.g. replace jcr: with _jcr_)
145 path = mangleNamespaces(path);
146
147 // add webapp context path
148 if (request != null) {
149 path = StringUtils.defaultString(request.getContextPath()) + path; //NOPMD
150 }
151
152 // url-encode path
153 path = Escape.urlEncode(path);
154 path = StringUtils.replace(path, "+", "%20");
155 // replace %2F back to / for better readability
156 path = StringUtils.replace(path, "%2F", "/");
157
158 // build full URL again
159 return path + (urlRemainder != null ? urlRemainder : "");
160 }
161
162 /*
163 * Detect as externalized:
164 * - everything staring with protocol and a colon is handled as externalized (http:, tel:, mailto:, javascript: etc.)
165 * - everything starting with // or # is handles as exteranlized
166 * - all other strings handles as not externalized
167 */
168 private static final Pattern EXTERNALIZED_PATTERN = Pattern.compile("^([^/]+:|//|#).+?");
169
170 /**
171 * Checks if the given URL is already externalized.
172 * For this check some heuristics are applied.
173 * @param url URL
174 * @return true if path is already externalized.
175 */
176 public static boolean isExternalized(@NotNull String url) {
177 return EXTERNALIZED_PATTERN.matcher(url).matches();
178 }
179
180 /**
181 * Checks if the given URL can be externalize, that means seems to be an content path that needs externalization.
182 * @param url URL
183 * @return true if url seems to be a path than needs externaliziation
184 */
185 public static boolean isExternalizable(@NotNull String url) {
186 return StringUtils.startsWith(url, "/");
187 }
188
189 private static final String MANGLED_NAMESPACE_PREFIX = "/_";
190 private static final String MANGLED_NAMESPACE_SUFFIX = "_";
191 private static final char NAMESPACE_SEPARATOR = ':';
192 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("/([^:/]+):");
193
194 /**
195 * Mangle the namespaces in the given path for usage in sling-based URLs.
196 *
197 * <p>
198 * Example: /path/jcr:content to /path/_jcr_content
199 * </p>
200 *
201 * @param path Path to mangle
202 * @return Mangled path
203 */
204 public static @NotNull String mangleNamespaces(@NotNull String path) {
205 if (!StringUtils.contains(path, NAMESPACE_SEPARATOR)) {
206 return path;
207 }
208 Matcher matcher = NAMESPACE_PATTERN.matcher(path);
209 StringBuffer sb = new StringBuffer();
210 while (matcher.find()) {
211 String replacement = MANGLED_NAMESPACE_PREFIX + matcher.group(1) + MANGLED_NAMESPACE_SUFFIX;
212 matcher.appendReplacement(sb, replacement);
213 }
214 matcher.appendTail(sb);
215 return sb.toString();
216 }
217
218 }