View Javadoc
1   /*
2    * #%L
3    * wcm.io
4    * %%
5    * Copyright (C) 2014 wcm.io
6    * %%
7    * Licensed under the Apache License, Version 2.0 (the "License");
8    * you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   *      http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   * #L%
19   */
20  package io.wcm.handler.url.impl;
21  
22  import java.net.URI;
23  import java.net.URISyntaxException;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.apache.commons.lang3.StringUtils;
28  import org.apache.sling.api.SlingHttpServletRequest;
29  import org.apache.sling.api.resource.ResourceResolver;
30  import org.jetbrains.annotations.NotNull;
31  import org.jetbrains.annotations.Nullable;
32  
33  import io.wcm.sling.commons.util.Escape;
34  
35  /**
36   * Utility methods for externalizing URLs.
37   */
38  final class Externalizer {
39  
40    private Externalizer() {
41      // static util methods only
42    }
43  
44    /**
45     * Externalizes an URL by applying Sling Mapping. Hostname and scheme are not added because they are added by the
46     * link handler depending on site URL configuration and secure/non-secure mode. URLs that are already externalized
47     * remain untouched.
48     * @param url Unexternalized URL (without scheme or hostname)
49     * @param resolver Resource resolver
50     * @param request Request
51     * @return Exernalized URL without scheme or hostname, but with short URLs (if configured in Sling Mapping is
52     *         configured), and the path is URL-encoded if it contains special chars.
53     */
54    public static @Nullable String externalizeUrl(@NotNull String url, @NotNull ResourceResolver resolver, @Nullable SlingHttpServletRequest request) {
55      return externalizeUrlWithSlingMapping(url, resolver, request, false);
56    }
57  
58    /**
59     * Externalizes a URL by applying Sling Mapping. Hostname and scheme will be added. URLs that are already externalized
60     * remain untouched.
61     * @param url non-externalized URL (without scheme or hostname)
62     * @param resolver Resource resolver
63     * @param request Request
64     * @return Externalized URL with scheme or hostname, short URLs (if configured in Sling Mapping),
65     *         and the path is URL-encoded if it contains special chars.
66     */
67    public static @Nullable String externalizeUrlWithHost(@NotNull String url, @NotNull ResourceResolver resolver, @Nullable SlingHttpServletRequest request) {
68      return externalizeUrlWithSlingMapping(url, resolver, request, true);
69    }
70  
71    @SuppressWarnings("java:S112") // allow runtime exception
72    private static @Nullable String externalizeUrlWithSlingMapping(@NotNull String url, @NotNull ResourceResolver resolver,
73        @Nullable SlingHttpServletRequest request, boolean keepHost) {
74  
75      // apply externalization only path part
76      String path = url;
77  
78      // split off query string or fragment that may be appended to the URL
79      String urlRemainder = null;
80      int urlRemainderPos = StringUtils.indexOfAny(path, '?', '#');
81      if (urlRemainderPos >= 0) {
82        urlRemainder = path.substring(urlRemainderPos);
83        path = path.substring(0, urlRemainderPos);
84      }
85  
86      // apply reverse mapping based on current sling mapping configuration for current request
87      // e.g. to support a host-based prefix stripping mapping configuration configured at /etc/map
88  
89      // please note: the sling map method does a lot of things:
90      // 1. applies reverse mapping depending on the sling mapping configuration
91      // (this can even add a hostname if defined in sling mapping configuration)
92      // 2. applies namespace mangling (e.g. replace jcr: with _jcr_)
93      // 3. adds webapp context path if required
94      // 4. url-encodes the whole url
95      if (request != null) {
96        path = resolver.map(request, path);
97      }
98      else {
99        path = resolver.map(path);
100     }
101 
102     if (!keepHost) {
103       // remove scheme and hostname (probably added by sling mapping), but leave path in escaped form
104       try {
105         path = new URI(path).getRawPath();
106         // replace %2F back to / for better readability
107         path = StringUtils.replace(path, "%2F", "/");
108       } catch (URISyntaxException ex) {
109         throw new RuntimeException("Sling map method returned invalid URI: " + path, ex);
110       }
111     }
112 
113     // build full URL again
114     if (path == null) {
115       return null;
116     }
117     else {
118       return path + (urlRemainder != null ? urlRemainder : "");
119     }
120   }
121 
122   /**
123    * Externalizes an URL without applying Sling Mapping. Instead the servlet context path is added and sling namespace
124    * mangling is applied manually.
125    * Hostname and scheme are not added because they are added by the link handler depending on site URL configuration
126    * and secure/non-secure mode. URLs that are already externalized remain untouched.
127    * @param url Unexternalized URL (without scheme or hostname)
128    * @param request Request
129    * @return Exernalized URL without scheme or hostname, the path is URL-encoded if it contains special chars.
130    */
131   public static @NotNull String externalizeUrlWithoutMapping(@NotNull String url, @Nullable SlingHttpServletRequest request) {
132 
133     // apply externalization only path part
134     String path = url;
135 
136     // split off query string or fragment that may be appended to the URL
137     String urlRemainder = null;
138     int urlRemainderPos = StringUtils.indexOfAny(path, '?', '#');
139     if (urlRemainderPos >= 0) {
140       urlRemainder = path.substring(urlRemainderPos);
141       path = path.substring(0, urlRemainderPos);
142     }
143 
144     // apply namespace mangling (e.g. replace jcr: with _jcr_)
145     path = mangleNamespaces(path);
146 
147     // add webapp context path
148     if (request != null) {
149       path = StringUtils.defaultString(request.getContextPath()) + path; //NOPMD
150     }
151 
152     // url-encode path
153     path = Escape.urlEncode(path);
154     path = StringUtils.replace(path, "+", "%20");
155     // replace %2F back to / for better readability
156     path = StringUtils.replace(path, "%2F", "/");
157 
158     // build full URL again
159     return path + (urlRemainder != null ? urlRemainder : "");
160   }
161 
162   /*
163    * Detect as externalized:
164    * - everything staring with protocol and a colon is handled as externalized (http:, tel:, mailto:, javascript: etc.)
165    * - everything starting with // or # is handles as exteranlized
166    * - all other strings handles as not externalized
167    */
168   private static final Pattern EXTERNALIZED_PATTERN = Pattern.compile("^([^/]+:|//|#).+?");
169 
170   /**
171    * Checks if the given URL is already externalized.
172    * For this check some heuristics are applied.
173    * @param url URL
174    * @return true if path is already externalized.
175    */
176   public static boolean isExternalized(@NotNull String url) {
177     return EXTERNALIZED_PATTERN.matcher(url).matches();
178   }
179 
180   /**
181    * Checks if the given URL can be externalize, that means seems to be an content path that needs externalization.
182    * @param url URL
183    * @return true if url seems to be a path than needs externaliziation
184    */
185   public static boolean isExternalizable(@NotNull String url) {
186     return StringUtils.startsWith(url, "/");
187   }
188 
189   private static final String MANGLED_NAMESPACE_PREFIX = "/_";
190   private static final String MANGLED_NAMESPACE_SUFFIX = "_";
191   private static final char NAMESPACE_SEPARATOR = ':';
192   private static final Pattern NAMESPACE_PATTERN = Pattern.compile("/([^:/]+):");
193 
194   /**
195    * Mangle the namespaces in the given path for usage in sling-based URLs.
196    *
197    * <p>
198    * Example: /path/jcr:content to /path/_jcr_content
199    * </p>
200    *
201    * @param path Path to mangle
202    * @return Mangled path
203    */
204   public static @NotNull String mangleNamespaces(@NotNull String path) {
205     if (!StringUtils.contains(path, NAMESPACE_SEPARATOR)) {
206       return path;
207     }
208     Matcher matcher = NAMESPACE_PATTERN.matcher(path);
209     StringBuffer sb = new StringBuffer();
210     while (matcher.find()) {
211       String replacement = MANGLED_NAMESPACE_PREFIX + matcher.group(1) + MANGLED_NAMESPACE_SUFFIX;
212       matcher.appendReplacement(sb, replacement);
213     }
214     matcher.appendTail(sb);
215     return sb.toString();
216   }
217 
218 }