|
|
The example below parses a PowerPoint file, transforms it to PowerML, and finally uses XPath to extract the images contained in the presentation. The images are stored in a given directory.
Make sure you have the JAR files contained in the PowerML core ZIP archive in your class path, as well as the JAR contained in the latest Jaxen distribution which can be downloaded here.
This example is also available for download. You may also be interested in our image extraction online demo.
import com.powerml.*;
import org.dom4j.*;
import sun.misc.BASE64Decoder;
import java.io.*;
import java.util.*;
/**
* This class shows how PowerML core can be used to extract pictures contained
* in a presentation.
*/
public class ImageExtractor {
/**
* This is the main method. Pay attention to the passed arguments!
*
* @param args contains the PowerPoint file path
* and (optionally) the image output directory
*/
public static void main(String[] args) {
if (args.length < 1) {
System.out.println("usage: java ImageExporter pptFile [outputDir]");
return;
}
String fileName = args[0];
String outputDir = (args.length > 1) ? args[1] : ".";
Document document = parseFile(fileName);
if (document == null) {
return; // an error occured
}
PPTImage[] images = null;
try {
images = getImages(document);
} catch (IOException e) {
System.out.println("An I/O error occured while parsing images!");
e.printStackTrace();
return;
}
try {
writeImages(images, outputDir);
} catch (IOException e) {
System.out.println("An I/O error occured while writing images!");
e.printStackTrace();
}
}
private static PPTImage[] getImages(Document document)
throws IOException {
// XPath ignores default namespace, so we have to define a prefix
document.getRootElement().add(
DocumentHelper.createNamespace("p", "http://www.powerml.com"));
// get all picture elements
List imageElements = document.selectNodes(
"/p:presentation/p:pictures/p:picture");
PPTImage[] images = new PPTImage[imageElements.size()];
int index = 0;
// iterate over all image elements, decode and store images in array
for (Iterator iterator = imageElements.iterator(); iterator.hasNext();) {
Element imageElement = (Element) iterator.next();
// read format attribute
String imageFormat = imageElement.valueOf("@format");
// decode base64-encoded image data
byte[] imageData = new BASE64Decoder().decodeBuffer(
imageElement.getText());
PPTImage image = new PPTImage(imageFormat, imageData);
images[index++] = image;
}
return images;
}
private static Document parseFile(String fileName) {
try {
PresentationParser parser = new PresentationParser(
new FileInputStream(fileName));
return parser.toDom4JDocument();
} catch (FileNotFoundException e) {
System.out.println("File " + fileName + " not found!");
} catch (IOException e) {
System.out.println(
"An I/O error occured while parsing PowerPoint file " + fileName);
e.printStackTrace();
} catch (InvalidFormatException e) {
System.out.println(
"File " + fileName + " is not a valid PowerPoint (97 or later) file!");
}
return null;
}
private static void writeImages(PPTImage[] images, String outputDir)
throws IOException {
for (int i = 0; i < images.length; i++) {
PPTImage image = images[i];
String fileName = outputDir + File.separatorChar + i + "." +
image.getFormat();
BufferedOutputStream stream = new BufferedOutputStream(
new FileOutputStream(fileName));
stream.write(image.getData());
stream.close();
}
System.out.println(images.length + " images exported!");
}
/**
* Wrapper class for images.
*/
private static class PPTImage {
private String format;
private byte[] data;
/**
* Creates a new PPTImage instance.
*
* @param format picture format (jpg, png, emf, wmf, pict or dib)
* @param data picture data as byte array
*/
public PPTImage(String format, byte[] data) {
this.format = format;
this.data = data;
}
/**
* Returns the image data.
*
* @return picture data as byte array
*/
public byte[] getData() {
return data;
}
/**
* Returns the picture format (jpg, png, emf, wmf, pict or dib).
*
* @return picture type
*/
public String getFormat() {
return format;
}
}
/*
* (c) 2005 Ralf Terdic (http://www.powerml.com).
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this file to use or distribute it without restriction, as long as the above
* copyright notice is retained and changes are marked as such.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
}