View Javadoc
1   /*
2    * #%L
3    * wcm.io
4    * %%
5    * Copyright (C) 2014 wcm.io
6    * %%
7    * Licensed under the Apache License, Version 2.0 (the "License");
8    * you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   *      http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   * #L%
19   */
20  package io.wcm.handler.richtext;
21  
22  import java.lang.reflect.Array;
23  import java.net.URLDecoder;
24  import java.nio.charset.StandardCharsets;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.HashMap;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Set;
32  
33  import org.apache.commons.lang3.StringUtils;
34  import org.apache.sling.api.SlingHttpServletRequest;
35  import org.apache.sling.api.adapter.Adaptable;
36  import org.apache.sling.api.resource.Resource;
37  import org.apache.sling.api.resource.ResourceResolver;
38  import org.apache.sling.api.resource.ValueMap;
39  import org.apache.sling.models.annotations.Model;
40  import org.apache.sling.models.annotations.injectorspecific.Self;
41  import org.apache.sling.models.annotations.injectorspecific.SlingObject;
42  import org.jdom2.Attribute;
43  import org.jdom2.Content;
44  import org.jdom2.Element;
45  import org.jdom2.Text;
46  import org.jetbrains.annotations.NotNull;
47  import org.jetbrains.annotations.Nullable;
48  import org.slf4j.Logger;
49  import org.slf4j.LoggerFactory;
50  
51  import com.day.cq.commons.jcr.JcrConstants;
52  import com.fasterxml.jackson.core.JsonProcessingException;
53  import com.fasterxml.jackson.core.type.TypeReference;
54  import com.fasterxml.jackson.databind.ObjectMapper;
55  
56  import io.wcm.handler.link.Link;
57  import io.wcm.handler.link.LinkHandler;
58  import io.wcm.handler.link.LinkNameConstants;
59  import io.wcm.handler.link.SyntheticLinkResource;
60  import io.wcm.handler.link.spi.LinkHandlerConfig;
61  import io.wcm.handler.link.spi.LinkType;
62  import io.wcm.handler.link.type.InternalLinkType;
63  import io.wcm.handler.link.type.MediaLinkType;
64  import io.wcm.handler.media.Media;
65  import io.wcm.handler.media.MediaHandler;
66  import io.wcm.handler.richtext.impl.DataPropertyUtil;
67  import io.wcm.handler.richtext.util.RewriteContentHandler;
68  import io.wcm.sling.commons.adapter.AdaptTo;
69  import io.wcm.wcm.commons.contenttype.FileExtension;
70  
71  /**
72   * Default implementation of {@link RewriteContentHandler}.
73   */
74  @Model(adaptables = { SlingHttpServletRequest.class, Resource.class })
75  public final class DefaultRewriteContentHandler implements RewriteContentHandler {
76  
77    @Self
78    private Adaptable adaptable;
79    @SlingObject
80    private ResourceResolver resourceResolver;
81    @Self
82    private LinkHandler linkHandler;
83    @Self
84    private LinkHandlerConfig linkHandlerConfig;
85    @Self
86    private MediaHandler mediaHandler;
87  
88    private static final Logger log = LoggerFactory.getLogger(DefaultRewriteContentHandler.class);
89    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
90    private static final TypeReference<HashMap<String, Object>> MAP_TYPE_REFERENCE = new TypeReference<HashMap<String, Object>>() {
91      // type reference
92    };
93  
94    /**
95     * List of all tag names that should not be rendered "self-closing" to avoid interpretation errors in browsers
96     */
97    private static final Set<String> NONSELFCLOSING_TAGS = Set.of(
98        "div",
99        "span",
100       "strong",
101       "em",
102       "b",
103       "i",
104       "ul",
105       "ol",
106       "li"
107       );
108 
109   /**
110    * Checks if the given element has to be rewritten.
111    * Is called for every child single element of the parent given to rewriteContent method.
112    * @param element Element to check
113    * @return null if nothing is to do with this element.
114    *         Return empty list to remove this element.
115    *         Return list with other content to replace element with new content.
116    */
117   @Override
118   @SuppressWarnings({ "PMD.ReturnEmptyCollectionRatherThanNull", "java:S1168" })
119   public @Nullable List<Content> rewriteElement(@NotNull Element element) {
120 
121     // rewrite anchor elements
122     if (StringUtils.equalsIgnoreCase(element.getName(), "a")) {
123       return rewriteAnchor(element);
124     }
125 
126     // rewrite image elements
127     else if (StringUtils.equalsIgnoreCase(element.getName(), "img")) {
128       return rewriteImage(element);
129     }
130 
131     // detect BR elements and turn those into "self-closing" elements
132     // since the otherwise generated <br> </br> structures are illegal and
133     // are not handled correctly by Internet Explorers
134     else if (StringUtils.equalsIgnoreCase(element.getName(), "br")) {
135       if (!element.getContent().isEmpty()) {
136         element.removeContent();
137       }
138       return null;
139     }
140 
141     // detect empty elements and insert at least an empty string to avoid "self-closing" elements
142     // that are not handled correctly by most browsers
143     else if (NONSELFCLOSING_TAGS.contains(StringUtils.lowerCase(element.getName()))) {
144       if (element.getContent().isEmpty()) {
145         element.setText("");
146       }
147       return null;
148     }
149 
150     return null;
151   }
152 
153   /**
154    * Checks if the given anchor element has to be rewritten.
155    * @param element Element to check
156    * @return null if nothing is to do with this element.
157    *         Return empty list to remove this element.
158    *         Return list with other content to replace element with new content.
159    */
160   private List<Content> rewriteAnchor(@NotNull Element element) {
161 
162     // detect empty anchor elements and insert at least an empty string to avoid "self-closing" elements
163     // that are not handled correctly by most browsers
164     if (element.getContent().isEmpty()) {
165       element.setText("");
166     }
167 
168     // resolve link metadata from DOM element
169     Link link = getAnchorLink(element);
170 
171     // build anchor for link metadata
172     Element anchorElement = buildAnchorElement(link, element);
173 
174     // Replace anchor tag or remove anchor tag if invalid - add any sub-content in every case
175     List<Content> content = new ArrayList<>();
176     if (anchorElement != null) {
177       anchorElement.addContent(element.cloneContent());
178       content.add(anchorElement);
179     }
180     else {
181       content.addAll(element.getContent());
182     }
183     return content;
184   }
185 
186   /**
187    * Extracts link metadata from the DOM elements attributes and resolves them to a {@link Link} object.
188    * @param element DOM element
189    * @return Link metadata
190    */
191   private Link getAnchorLink(Element element) {
192     Resource currentResource = getCurrentResource();
193     if (currentResource == null) {
194       return linkHandler.invalid();
195     }
196 
197     SyntheticLinkResource resource = new SyntheticLinkResource(resourceResolver,
198         currentResource.getPath() + "/$link$");
199     ValueMap resourceProps = resource.getValueMap();
200 
201     // get link metadata from data element
202     boolean foundMetadata = getAnchorMetadataFromData(resourceProps, element);
203     if (!foundMetadata) {
204       // support for legacy metadata stored in single "data" attribute
205       foundMetadata = getAnchorLegacyMetadataFromSingleData(resourceProps, element);
206       if (!foundMetadata) {
207         // support for legacy metadata stored in rel attribute
208         getAnchorLegacyMetadataFromRel(resourceProps, element);
209       }
210     }
211 
212     // build anchor via linkhandler
213     return linkHandler.get(resource).build();
214   }
215 
216   /**
217    * Builds anchor element for given link metadata.
218    * @param link Link metadata
219    * @param element Original element
220    * @return Anchor element or null if link is invalid
221    */
222   private Element buildAnchorElement(Link link, Element element) {
223     if (link.isValid()) {
224       return link.getAnchor();
225     }
226     else if ((element.getAttributeValue("id") != null || element.getAttributeValue("name") != null) && element.getAttributeValue("src") == null) {
227       // not a valid link, but it seems to be a named anchor - keep it
228       // support both id attribute (valid in HTML4+HTML5) and the name attribute (only valid in HTML4)
229       return element;
230     }
231     else {
232       return null;
233     }
234   }
235 
236   /**
237    * Support data structures where link metadata is stored in mutliple HTML5 data-* attributes.
238    * @param resourceProps ValueMap to write link metadata to
239    * @param element Link element
240    * @return true if any metadata attribute was found
241    */
242   @SuppressWarnings("java:S3776") // ignore complexity
243   private boolean getAnchorMetadataFromData(ValueMap resourceProps, Element element) {
244     boolean foundAny = false;
245 
246     List<Attribute> attributes = element.getAttributes();
247     for (Attribute attribute : attributes) {
248       if (DataPropertyUtil.isHtml5DataName(attribute.getName())) {
249         String value = attribute.getValue();
250         if (StringUtils.isNotEmpty(value)) {
251           String property = DataPropertyUtil.toHeadlessCamelCaseName(attribute.getName());
252           if (StringUtils.startsWith(value, "[") && StringUtils.endsWith(value, "]")) {
253             try {
254               String[] values = OBJECT_MAPPER.readValue(value, String[].class);
255               resourceProps.put(property, values);
256             }
257             catch (JsonProcessingException ex) {
258               log.debug("Unable to parse JSON array: {}", value, ex);
259             }
260           }
261           else {
262             resourceProps.put(property, value);
263           }
264           foundAny = true;
265         }
266       }
267     }
268 
269     return foundAny;
270   }
271 
272   /**
273    * Support legacy data structures where link metadata is stored as JSON fragment in single HTML5 data attribute.
274    * @param resourceProps ValueMap to write link metadata to
275    * @param element Link element
276    */
277   private boolean getAnchorLegacyMetadataFromSingleData(ValueMap resourceProps, Element element) {
278     boolean foundAny = false;
279 
280     Map<String, Object> metadata = null;
281     Attribute dataAttribute = element.getAttribute("data");
282     if (dataAttribute != null) {
283       String metadataString = dataAttribute.getValue();
284       if (StringUtils.isNotEmpty(metadataString)) {
285         try {
286           metadata = OBJECT_MAPPER.readValue(metadataString, MAP_TYPE_REFERENCE);
287         }
288         catch (JsonProcessingException ex) {
289           log.debug("Invalid link metadata: {}", metadataString, ex);
290         }
291       }
292     }
293     if (metadata != null) {
294       Iterator<Map.Entry<String, Object>> entries = metadata.entrySet().iterator();
295       while (entries.hasNext()) {
296         Map.Entry<String, Object> entry = entries.next();
297         resourceProps.put(entry.getKey(), entry.getValue());
298         foundAny = true;
299       }
300     }
301 
302     return foundAny;
303   }
304 
305   /**
306    * Support legacy data structures where link metadata is stored as JSON fragment in rel attribute.
307    * @param resourceProps ValueMap to write link metadata to
308    * @param element Link element
309    */
310   @SuppressWarnings({ "java:S6541", "java:S3776", "java:S135" }) // ignore complexity
311   private void getAnchorLegacyMetadataFromRel(ValueMap resourceProps, Element element) {
312     // Check href attribute - do not change elements with no href or links to anchor names
313     String href = element.getAttributeValue("href");
314     String linkWindowTarget = element.getAttributeValue("target");
315     if (href == null || href.startsWith("#")) {
316       return;
317     }
318 
319     // get link metadata from rel element
320     Map<String, Object> metadata = null;
321     String metadataString = element.getAttributeValue("rel");
322     if (StringUtils.isNotEmpty(metadataString)) {
323       try {
324         metadata = OBJECT_MAPPER.readValue(metadataString, MAP_TYPE_REFERENCE);
325       }
326       catch (JsonProcessingException ex) {
327         log.debug("Invalid link metadata: {}", metadataString, ex);
328       }
329     }
330     if (metadata == null) {
331       metadata = new HashMap<>();
332     }
333 
334     // transform link metadata to virtual JCR resource with JCR properties
335     Iterator<Map.Entry<String, Object>> entries = metadata.entrySet().iterator();
336     while (entries.hasNext()) {
337       Map.Entry<String, Object> entry = entries.next();
338       String metadataPropertyName = entry.getKey();
339       Object value = entry.getValue();
340 
341       // check if value is collection
342       if (value != null) {
343         if (value instanceof Collection) {
344           resourceProps.put(metadataPropertyName, ((Collection)value).toArray());
345         }
346         // check if value is array
347         else if (value.getClass().isArray()) {
348           // store array values
349           List<String> values = new ArrayList<>();
350           int arrayLength = Array.getLength(value);
351           for (int j = 0; j < arrayLength; j++) {
352             Object arrayItem = Array.get(value, j);
353             if (arrayItem != null) {
354               values.add(arrayItem.toString());
355             }
356           }
357           resourceProps.put(metadataPropertyName, values.toArray());
358         }
359         else {
360           // store simple value
361           resourceProps.put(metadataPropertyName, value);
362         }
363       }
364     }
365 
366     // detect link type
367     LinkType linkType = null;
368     String linkTypeString = resourceProps.get(LinkNameConstants.PN_LINK_TYPE, String.class);
369     for (Class<? extends LinkType> candidateClass : linkHandlerConfig.getLinkTypes()) {
370       LinkType candidate = AdaptTo.notNull(adaptable, candidateClass);
371       if (StringUtils.isNotEmpty(linkTypeString)) {
372         if (StringUtils.equals(linkTypeString, candidate.getId())) {
373           linkType = candidate;
374           break;
375         }
376       }
377       else if (candidate.accepts(href)) {
378         linkType = candidate;
379         break;
380       }
381     }
382     if (linkType == null) {
383       // skip further processing if link type was not detected
384       return;
385     }
386 
387     // workaround: strip off ".html" extension if it was added automatically by the RTE
388     if (linkType instanceof InternalLinkType || linkType instanceof MediaLinkType) {
389       String htmlSuffix = "." + FileExtension.HTML;
390       if (StringUtils.endsWith(href, htmlSuffix)) {
391         href = StringUtils.substringBeforeLast(href, htmlSuffix);
392       }
393     }
394 
395     // store link reference (property depending on link type)
396     resourceProps.put(linkType.getPrimaryLinkRefProperty(), href);
397     resourceProps.put(LinkNameConstants.PN_LINK_WINDOW_TARGET, linkWindowTarget);
398 
399   }
400 
401   /**
402    * Checks if the given image element has to be rewritten.
403    * @param element Element to check
404    * @return null if nothing is to do with this element.
405    *         Return empty list to remove this element.
406    *         Return list with other content to replace element with new content.
407    */
408   private List<Content> rewriteImage(@NotNull Element element) {
409 
410     // resolve media metadata from DOM element
411     Media media = getImageMedia(element);
412 
413     // build image for media metadata
414     Element imageElement = buildImageElement(media, element);
415 
416     // return modified element
417     List<Content> content = new ArrayList<>();
418     content.add(imageElement);
419     return content;
420   }
421 
422   /**
423    * Extracts media metadata from the DOM element attributes and resolves them to a {@link Media} object.
424    * @param element DOM element
425    * @return Media metadata
426    */
427   private Media getImageMedia(@NotNull Element element) {
428     String ref = element.getAttributeValue("src");
429     if (StringUtils.isNotEmpty(ref)) {
430       ref = unexternalizeImageRef(ref);
431     }
432     return mediaHandler.get(ref).build();
433   }
434 
435   /**
436    * Builds image element for given media metadata.
437    * @param media Media metadata
438    * @param element Original element
439    * @return Image element or null if media reference is invalid
440    */
441   private Element buildImageElement(@NotNull Media media, @NotNull Element element) {
442     if (media.isValid()) {
443       element.setAttribute("src", media.getUrl());
444     }
445     return element;
446   }
447 
448   /**
449    * Converts the RTE externalized form of media reference to internal form.
450    * @param ref Externalize media reference
451    * @return Internal media reference
452    */
453   private String unexternalizeImageRef(String ref) {
454     String unexternalizedRef = ref;
455 
456     if (StringUtils.isNotEmpty(unexternalizedRef)) {
457 
458       // decode if required
459       unexternalizedRef = decodeIfEncoded(unexternalizedRef);
460 
461       // remove default servlet extension that is needed for inline images in RTE
462       // note: implementation might not fit for all MediaSource implementations!
463       unexternalizedRef = StringUtils.removeEnd(unexternalizedRef, "/" + JcrConstants.JCR_CONTENT + ".default");
464       unexternalizedRef = StringUtils.removeEnd(unexternalizedRef, "/_jcr_content.default");
465     }
466 
467     return unexternalizedRef;
468   }
469 
470   /**
471    * URL-decode value if required.
472    * @param value Probably encoded value.
473    * @return Decoded value
474    */
475   private String decodeIfEncoded(String value) {
476     if (StringUtils.contains(value, "%")) {
477       return URLDecoder.decode(value, StandardCharsets.UTF_8);
478     }
479     return value;
480   }
481 
482   @Override
483   @SuppressWarnings({ "PMD.ReturnEmptyCollectionRatherThanNull", "java:S1168" })
484   public @Nullable List<Content> rewriteText(@NotNull Text text) {
485     // nothing to do with text element
486     return null;
487   }
488 
489   private @Nullable Resource getCurrentResource() {
490     if (adaptable instanceof Resource) {
491       return (Resource)adaptable;
492     }
493     if (adaptable instanceof SlingHttpServletRequest) {
494       return ((SlingHttpServletRequest)adaptable).getResource();
495     }
496     return null;
497   }
498 
499 }