1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package io.wcm.tooling.commons.packmgr.unpack;
21
22 import static org.apache.jackrabbit.vault.util.Constants.DOT_CONTENT_XML;
23 import static org.apache.jackrabbit.vault.util.Constants.ROOT_DIR;
24
25 import java.io.File;
26 import java.io.FileOutputStream;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.OutputStream;
30 import java.util.ArrayList;
31 import java.util.Calendar;
32 import java.util.Enumeration;
33 import java.util.HashSet;
34 import java.util.LinkedHashSet;
35 import java.util.List;
36 import java.util.Set;
37 import java.util.TreeSet;
38 import java.util.concurrent.atomic.AtomicBoolean;
39 import java.util.regex.Matcher;
40 import java.util.regex.Pattern;
41 import java.util.regex.PatternSyntaxException;
42
43 import javax.jcr.PropertyType;
44 import javax.xml.XMLConstants;
45 import javax.xml.parsers.ParserConfigurationException;
46 import javax.xml.parsers.SAXParser;
47 import javax.xml.parsers.SAXParserFactory;
48
49 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
50 import org.apache.commons.compress.archivers.zip.ZipFile;
51 import org.apache.commons.io.FileUtils;
52 import org.apache.commons.io.FilenameUtils;
53 import org.apache.commons.io.IOUtils;
54 import org.apache.commons.lang3.StringUtils;
55 import org.apache.jackrabbit.JcrConstants;
56 import org.apache.jackrabbit.util.ISO8601;
57 import org.apache.jackrabbit.vault.fs.io.DocViewFormat;
58 import org.apache.jackrabbit.vault.util.PlatformNameFormat;
59 import org.jdom2.Attribute;
60 import org.jdom2.Document;
61 import org.jdom2.Element;
62 import org.jdom2.JDOMException;
63 import org.jdom2.Namespace;
64 import org.jdom2.input.SAXBuilder;
65 import org.jdom2.output.Format;
66 import org.jdom2.output.LineSeparator;
67 import org.jdom2.output.XMLOutputter;
68 import org.xml.sax.Attributes;
69 import org.xml.sax.SAXException;
70 import org.xml.sax.helpers.DefaultHandler;
71
72 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
73 import io.wcm.tooling.commons.packmgr.PackageManagerException;
74
75
76
77
78 public final class ContentUnpacker {
79
80 private static final String MIXINS_PROPERTY = "jcr:mixinTypes";
81 private static final String PRIMARYTYPE_PROPERTY = "jcr:primaryType";
82 private static final Namespace JCR_NAMESPACE = Namespace.getNamespace("jcr", "http://www.jcp.org/jcr/1.0");
83 private static final Namespace CQ_NAMESPACE = Namespace.getNamespace("cq", "http://www.day.com/jcr/cq/1.0");
84 private static final Pattern FILENAME_NAMESPACE_PATTERN = Pattern.compile("^([^:]+):(.+)$");
85
86 private static final SAXParserFactory SAX_PARSER_FACTORY;
87 static {
88 SAX_PARSER_FACTORY = SAXParserFactory.newInstance();
89 SAX_PARSER_FACTORY.setNamespaceAware(true);
90 }
91
92 private static final DocViewFormat DOCVIEWFORMAT = new DocViewFormat();
93
94 private final Pattern[] excludeFiles;
95 private final Pattern[] excludeNodes;
96 private final Pattern[] excludeProperties;
97 private final Pattern[] excludeMixins;
98 private final boolean markReplicationActivated;
99 private final Pattern[] markReplicationActivatedIncludeNodes;
100 private final String dateLastReplicated;
101
102
103
104
105 public ContentUnpacker(ContentUnpackerProperties properties) {
106 this.excludeFiles = toPatternArray(properties.getExcludeFiles());
107 this.excludeNodes = toPatternArray(properties.getExcludeNodes());
108 this.excludeProperties = toPatternArray(properties.getExcludeProperties());
109 this.excludeMixins = toPatternArray(properties.getExcludeMixins());
110 this.markReplicationActivated = properties.isMarkReplicationActivated();
111 this.markReplicationActivatedIncludeNodes = toPatternArray(properties.getMarkReplicationActivatedIncludeNodes());
112
113 if (StringUtils.isNotBlank(properties.getDateLastReplicated())) {
114 this.dateLastReplicated = properties.getDateLastReplicated();
115 }
116 else {
117
118 Calendar cal = Calendar.getInstance();
119 cal.set(Calendar.HOUR_OF_DAY, 0);
120 cal.set(Calendar.MINUTE, 0);
121 cal.set(Calendar.SECOND, 0);
122 cal.set(Calendar.MILLISECOND, 0);
123 this.dateLastReplicated = ISO8601.format(cal);
124 }
125 }
126
127 private static Pattern[] toPatternArray(String[] patternStrings) {
128 if (patternStrings == null) {
129 return new Pattern[0];
130 }
131 Pattern[] patterns = new Pattern[patternStrings.length];
132 for (int i = 0; i < patternStrings.length; i++) {
133 try {
134 patterns[i] = Pattern.compile(patternStrings[i]);
135 }
136 catch (PatternSyntaxException ex) {
137 throw new PackageManagerException("Invalid regexp pattern: " + patternStrings[i], ex);
138 }
139 }
140 return patterns;
141 }
142
143 private static boolean matches(String name, Pattern[] patterns, boolean defaultIfNotPatternsDefined) {
144 if (patterns.length == 0) {
145 return defaultIfNotPatternsDefined;
146 }
147 for (Pattern pattern : patterns) {
148 if (pattern.matcher(name).matches()) {
149 return true;
150 }
151 }
152 return false;
153 }
154
155 private boolean applyXmlExcludes(String name) {
156 if (this.excludeNodes.length == 0 && this.excludeProperties.length == 0) {
157 return false;
158 }
159 return isJcrContentXmlFile(name);
160 }
161
162 private boolean isJcrContentXmlFile(String name) {
163 return StringUtils.equalsIgnoreCase(FilenameUtils.getExtension(name), "xml")
164 && StringUtils.startsWith(name, "jcr_root/");
165 }
166
167
168
169
170
171
172 public void unpack(File file, File outputDirectory) {
173 try (ZipFile zipFile = new ZipFile.Builder().setFile(file).get()) {
174 Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
175 while (entries.hasMoreElements()) {
176 ZipArchiveEntry entry = entries.nextElement();
177 if (!matches(entry.getName(), excludeFiles, false)) {
178 unpackEntry(zipFile, entry, outputDirectory);
179 }
180 }
181 }
182 catch (IOException ex) {
183 throw new PackageManagerException("Error reading content package " + file.getAbsolutePath(), ex);
184 }
185 }
186
187 @SuppressFBWarnings("RV_RETURN_VALUE_IGNORED_BAD_PRACTICE")
188 private void unpackEntry(ZipFile zipFile, ZipArchiveEntry entry, File outputDirectory) throws IOException {
189 if (entry.isDirectory()) {
190 File directory = FileUtils.getFile(outputDirectory, entry.getName());
191 directory.mkdirs();
192 }
193 else {
194 Set<String> namespacePrefixes = null;
195 if (applyXmlExcludes(entry.getName())) {
196 namespacePrefixes = getNamespacePrefixes(zipFile, entry);
197 }
198
199 try (InputStream entryStream = zipFile.getInputStream(entry)) {
200 File outputFile = FileUtils.getFile(outputDirectory, entry.getName());
201 if (outputFile.exists()) {
202 outputFile.delete();
203 }
204 File directory = outputFile.getParentFile();
205 directory.mkdirs();
206
207 try (FileOutputStream fos = new FileOutputStream(outputFile)) {
208 if (applyXmlExcludes(entry.getName()) && namespacePrefixes != null) {
209
210 try {
211 writeXmlWithExcludes(entry, entryStream, fos, namespacePrefixes);
212 }
213 catch (JDOMException ex) {
214 throw new PackageManagerException("Unable to parse XML file: " + entry.getName(), ex);
215 }
216 }
217 else {
218
219 IOUtils.copy(entryStream, fos);
220 }
221 }
222 if (isJcrContentXmlFile(entry.getName())) {
223
224 try {
225 DOCVIEWFORMAT.format(outputFile, false);
226 }
227 catch (IOException ex) {
228 throw new IOException("Unable to apply DocView format to file: " + outputFile.getAbsolutePath(), ex);
229 }
230 }
231 }
232 }
233 }
234
235
236
237
238
239
240
241
242
243 private Set<String> getNamespacePrefixes(ZipFile zipFile, ZipArchiveEntry entry) throws IOException {
244 try (InputStream entryStream = zipFile.getInputStream(entry)) {
245 SAXParser parser = SAX_PARSER_FACTORY.newSAXParser();
246 final Set<String> prefixes = new LinkedHashSet<>();
247
248 final AtomicBoolean foundRootElement = new AtomicBoolean(false);
249 DefaultHandler handler = new DefaultHandler() {
250 @Override
251 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
252
253 if (StringUtils.equals(uri, JCR_NAMESPACE.getURI()) && StringUtils.equals(localName, "root")) {
254 foundRootElement.set(true);
255 }
256 }
257 @Override
258 public void startPrefixMapping(String prefix, String uri) throws SAXException {
259 if (StringUtils.isNotBlank(prefix)) {
260 prefixes.add(prefix);
261 }
262 }
263 };
264 parser.parse(entryStream, handler);
265
266 if (!foundRootElement.get()) {
267 return null;
268 }
269 else {
270 return prefixes;
271 }
272 }
273 catch (IOException | SAXException | ParserConfigurationException ex) {
274 throw new IOException("Error parsing " + entry.getName(), ex);
275 }
276 }
277
278 private void writeXmlWithExcludes(ZipArchiveEntry entry, InputStream inputStream, OutputStream outputStream, Set<String> namespacePrefixes)
279 throws IOException, JDOMException {
280 SAXBuilder saxBuilder = new SAXBuilder();
281 saxBuilder.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
282 saxBuilder.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
283 Document doc = saxBuilder.build(inputStream);
284
285 Set<String> namespacePrefixesActuallyUsed = new HashSet<>();
286
287
288 String namespacePrefix = getNamespacePrefix(entry.getName());
289 if (namespacePrefix != null) {
290 namespacePrefixesActuallyUsed.add(namespacePrefix);
291 }
292
293 applyXmlExcludes(doc.getRootElement(), getParentPath(entry), namespacePrefixesActuallyUsed, false);
294
295 XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat()
296 .setIndent(" ")
297 .setLineSeparator(LineSeparator.UNIX));
298 outputter.setXMLOutputProcessor(new NamspaceOrderedXmlProcessor(namespacePrefixes, namespacePrefixesActuallyUsed));
299 outputter.output(doc, outputStream);
300 outputStream.flush();
301 }
302
303 static String getNamespacePrefix(String path) {
304 String fileName = FilenameUtils.getName(path);
305 if (StringUtils.equals(DOT_CONTENT_XML, fileName)) {
306 String parentFolderName = FilenameUtils.getName(FilenameUtils.getPathNoEndSeparator(path));
307 if (parentFolderName != null) {
308 String nodeName = PlatformNameFormat.getRepositoryName(parentFolderName);
309 Matcher matcher = FILENAME_NAMESPACE_PATTERN.matcher(nodeName);
310 if (matcher.matches()) {
311 return matcher.group(1);
312 }
313 }
314 }
315 return null;
316 }
317
318 private String getParentPath(ZipArchiveEntry entry) {
319 return StringUtils.removeEnd(StringUtils.removeStart(entry.getName(), ROOT_DIR), "/" + DOT_CONTENT_XML);
320 }
321
322 private String buildElementPath(Element element, String parentPath) {
323 StringBuilder path = new StringBuilder(parentPath);
324 if (!StringUtils.equals(element.getQualifiedName(), "jcr:root")) {
325 path.append("/").append(element.getQualifiedName());
326 }
327 return path.toString();
328 }
329
330 @SuppressWarnings("PMD.EmptyControlStatement")
331 private void applyXmlExcludes(Element element, String parentPath, Set<String> namespacePrefixesActuallyUsed,
332 boolean insideReplicationElement) {
333 String path = buildElementPath(element, parentPath);
334 if (matches(path, this.excludeNodes, false)) {
335 element.detach();
336 return;
337 }
338 collectNamespacePrefix(namespacePrefixesActuallyUsed, element.getNamespacePrefix());
339
340 String jcrPrimaryType = element.getAttributeValue("primaryType", JCR_NAMESPACE);
341 boolean isRepositoryUserGroup = StringUtils.equals(jcrPrimaryType, "rep:User") || StringUtils.equals(jcrPrimaryType, "rep:Group");
342 boolean isReplicationElement = StringUtils.equals(jcrPrimaryType, "cq:Page")
343 || StringUtils.equals(jcrPrimaryType, "dam:Asset")
344 || StringUtils.equals(jcrPrimaryType, "cq:Template");
345 boolean isContent = insideReplicationElement && StringUtils.equals(element.getQualifiedName(), "jcr:content");
346 boolean setReplicationAttributes = isContent && markReplicationActivated;
347
348 List<Attribute> attributes = new ArrayList<>(element.getAttributes());
349 for (Attribute attribute : attributes) {
350 boolean excluded = false;
351 if (matches(attribute.getQualifiedName(), this.excludeProperties, false)) {
352 if (isRepositoryUserGroup && StringUtils.equals(attribute.getQualifiedName(), JcrConstants.JCR_UUID)) {
353
354 }
355 else {
356 attribute.detach();
357 excluded = true;
358 }
359 }
360 else if (StringUtils.equals(attribute.getQualifiedName(), PRIMARYTYPE_PROPERTY)) {
361 String namespacePrefix = StringUtils.substringBefore(attribute.getValue(), ":");
362 collectNamespacePrefix(namespacePrefixesActuallyUsed, namespacePrefix);
363 }
364 else if (StringUtils.equals(attribute.getQualifiedName(), MIXINS_PROPERTY)) {
365 String filteredValue = filterMixinsPropertyValue(attribute.getValue(), namespacePrefixesActuallyUsed);
366 if (StringUtils.isBlank(filteredValue)) {
367 attribute.detach();
368 }
369 else {
370 attribute.setValue(filteredValue);
371 }
372 }
373 else if (StringUtils.startsWith(attribute.getValue(), "{Name}")) {
374 collectNamespacePrefixNameArray(namespacePrefixesActuallyUsed, attribute.getValue());
375
376 attribute.setValue(sortReferenceValues(attribute.getValue(), PropertyType.NAME));
377 }
378 else if (StringUtils.startsWith(attribute.getValue(), "{WeakReference}")) {
379
380 attribute.setValue(sortReferenceValues(attribute.getValue(), PropertyType.WEAKREFERENCE));
381 }
382 if (!excluded) {
383 collectNamespacePrefix(namespacePrefixesActuallyUsed, attribute.getNamespacePrefix());
384 }
385 }
386
387
388 if (setReplicationAttributes && matches(path, markReplicationActivatedIncludeNodes, true)) {
389 addMixin(element, "cq:ReplicationStatus");
390 element.setAttribute("lastReplicated", "{Date}" + dateLastReplicated, CQ_NAMESPACE);
391 element.setAttribute("lastReplicationAction", "Activate", CQ_NAMESPACE);
392 collectNamespacePrefix(namespacePrefixesActuallyUsed, CQ_NAMESPACE.getPrefix());
393 }
394
395
396 if (isReplicationElement && element.getChild("content", JCR_NAMESPACE) == null
397 && matches(path + "/jcr:content", markReplicationActivatedIncludeNodes, true)) {
398 Element contentNode = new Element("content", JCR_NAMESPACE);
399 String jcrContentPrimaryType = StringUtils.equals(jcrPrimaryType, "cq:Template") ? "cq:PageContent" : jcrPrimaryType + "Content";
400 contentNode.setAttribute("primaryType", jcrContentPrimaryType, JCR_NAMESPACE);
401 element.addContent(contentNode);
402 }
403
404 List<Element> children = new ArrayList<>(element.getChildren());
405 for (Element child : children) {
406 applyXmlExcludes(child, path, namespacePrefixesActuallyUsed, (insideReplicationElement || isReplicationElement) && !isContent);
407 }
408 }
409
410 private String filterMixinsPropertyValue(String value, Set<String> namespacePrefixesActuallyUsed) {
411 if (this.excludeMixins.length == 0 || StringUtils.isBlank(value)) {
412 return value;
413 }
414
415 List<String> mixins = new ArrayList<>();
416 for (String mixin : DocViewUtil.parseValues(value)) {
417 if (!matches(mixin, this.excludeMixins, false)) {
418 String namespacePrefix = StringUtils.substringBefore(mixin, ":");
419 collectNamespacePrefix(namespacePrefixesActuallyUsed, namespacePrefix);
420 mixins.add(mixin);
421 }
422 }
423
424 if (mixins.isEmpty()) {
425 return null;
426 }
427
428 return DocViewUtil.formatValues(mixins);
429 }
430
431 private void addMixin(Element element, String mixin) {
432 String mixinsString = element.getAttributeValue("mixinTypes", JCR_NAMESPACE);
433
434 List<String> mixins = new ArrayList<>();
435 if (!StringUtils.isBlank(mixinsString)) {
436 for (String item : DocViewUtil.parseValues(mixinsString)) {
437 mixins.add(item);
438 }
439 }
440 if (!mixins.contains(mixin)) {
441 mixins.add(mixin);
442 }
443
444 element.setAttribute("mixinTypes", DocViewUtil.formatValues(mixins), JCR_NAMESPACE);
445 }
446
447 private void collectNamespacePrefix(Set<String> prefixes, String prefix) {
448 if (StringUtils.isNotBlank(prefix)) {
449 prefixes.add(prefix);
450 }
451 }
452
453 private void collectNamespacePrefixNameArray(Set<String> prefixes, String value) {
454 for (String item : DocViewUtil.parseValues(value)) {
455 String namespacePrefix = StringUtils.substringBefore(item, ":");
456 collectNamespacePrefix(prefixes, namespacePrefix);
457 }
458 }
459
460
461
462
463
464
465
466 private String sortReferenceValues(String value, int propertyType) {
467 Set<String> refs = new TreeSet<>();
468 for (String item : DocViewUtil.parseValues(value)) {
469 refs.add(item);
470 }
471 return DocViewUtil.formatValues(new ArrayList<>(refs), propertyType);
472 }
473
474 }