diff --git a/Ghidra/Features/Base/src/main/java/ghidra/formats/gfilesystem/FileSystemService.java b/Ghidra/Features/Base/src/main/java/ghidra/formats/gfilesystem/FileSystemService.java index 00dfe29090..8e217d6a36 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/formats/gfilesystem/FileSystemService.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/formats/gfilesystem/FileSystemService.java @@ -15,10 +15,7 @@ */ package ghidra.formats.gfilesystem; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import java.io.*; import java.util.List; import org.apache.commons.io.FilenameUtils; @@ -451,15 +448,16 @@ public class FileSystemService { * lambda will be called and it will be responsible for returning an {@link InputStream} * which has the derived contents, which will be added to the file cache for next time. *
- * @param fsrl {@link FSRL} of the source file that this derived file is based on. - * @param derivedName a unique string identifying the derived file. + * @param fsrl {@link FSRL} of the source (or container) file that this derived file is based on + * @param derivedName a unique string identifying the derived file inside the source (or container) file * @param producer a {@link DerivedFileProducer callback or lambda} that returns an * {@link InputStream} that will be streamed into a file and placed into the file cache. + * Example:
(file) -> { return new XYZDecryptorInputStream(file); }
* @param monitor {@link TaskMonitor} that will be monitor for cancel requests and updated
- * with file io progress.
- * @return {@link FileCacheEntry} with file and md5 fields.
- * @throws CancelledException if the user cancels.
- * @throws IOException if there was an io error.
+ * with file io progress
+ * @return {@link FileCacheEntry} with file and md5 fields
+ * @throws CancelledException if the user cancels
+ * @throws IOException if there was an io error
*/
public FileCacheEntry getDerivedFile(FSRL fsrl, String derivedName,
DerivedFileProducer producer, TaskMonitor monitor)
@@ -470,19 +468,16 @@ public class FileSystemService {
// case should be okay as the only bad result will be extra
// work being performed recreating the contents of the same derived file a second
// time.
- FileCacheEntry srcCFI = getCacheFile(fsrl, monitor);
- String derivedMD5 = fileCacheNameIndex.get(srcCFI.md5, derivedName);
+ FileCacheEntry cacheEntry = getCacheFile(fsrl, monitor);
+ String derivedMD5 = fileCacheNameIndex.get(cacheEntry.md5, derivedName);
FileCacheEntry derivedFile = (derivedMD5 != null) ? fileCache.getFile(derivedMD5) : null;
if (derivedFile == null) {
monitor.setMessage(derivedName + " " + fsrl.getName());
- try (InputStream is = producer.produceDerivedStream(srcCFI.file)) {
+ try (InputStream is = producer.produceDerivedStream(cacheEntry.file)) {
derivedFile = fileCache.addStream(is, monitor);
- fileCacheNameIndex.add(srcCFI.md5, derivedName, derivedFile.md5);
+ fileCacheNameIndex.add(cacheEntry.md5, derivedName, derivedFile.md5);
}
}
- else {
- Msg.info(null, "Found derived file in cache: " + fsrl + ", " + derivedName);
- }
return derivedFile;
}
@@ -495,15 +490,15 @@ public class FileSystemService {
* lambda will be called and it will be responsible for producing and writing the derived
* file's bytes to a {@link OutputStream}, which will be added to the file cache for next time.
* - * @param fsrl {@link FSRL} of the source file that this derived file is based on. - * @param derivedName a unique string identifying the derived file. + * @param fsrl {@link FSRL} of the source (or container) file that this derived file is based on + * @param derivedName a unique string identifying the derived file inside the source (or container) file * @param pusher a {@link DerivedFilePushProducer callback or lambda} that recieves a {@link OutputStream}. * Example:
(os) -> { ...write to outputstream os here...; }
* @param monitor {@link TaskMonitor} that will be monitor for cancel requests and updated
- * with file io progress.
- * @return {@link FileCacheEntry} with file and md5 fields.
- * @throws CancelledException if the user cancels.
- * @throws IOException if there was an io error.
+ * with file io progress
+ * @return {@link FileCacheEntry} with file and md5 fields
+ * @throws CancelledException if the user cancels
+ * @throws IOException if there was an io error
*/
public FileCacheEntry getDerivedFilePush(FSRL fsrl, String derivedName,
DerivedFilePushProducer pusher, TaskMonitor monitor)
@@ -514,20 +509,34 @@ public class FileSystemService {
// case should be okay as the only bad result will be extra
// work being performed recreating the contents of the same derived file a second
// time.
- FileCacheEntry srcCFI = getCacheFile(fsrl, monitor);
- String derivedMD5 = fileCacheNameIndex.get(srcCFI.md5, derivedName);
+ FileCacheEntry cacheEntry = getCacheFile(fsrl, monitor);
+ String derivedMD5 = fileCacheNameIndex.get(cacheEntry.md5, derivedName);
FileCacheEntry derivedFile = (derivedMD5 != null) ? fileCache.getFile(derivedMD5) : null;
if (derivedFile == null) {
monitor.setMessage("Caching " + fsrl.getName() + " " + derivedName);
derivedFile = fileCache.pushStream(pusher, monitor);
- fileCacheNameIndex.add(srcCFI.md5, derivedName, derivedFile.md5);
- }
- else {
- Msg.info(null, "Found derived file in cache: " + fsrl + ", " + derivedName);
+ fileCacheNameIndex.add(cacheEntry.md5, derivedName, derivedFile.md5);
}
return derivedFile;
}
+ /**
+ * Returns true if the specified derived file exists in the file cache.
+ *
+ * @param fsrl {@link FSRL} of the container
+ * @param derivedName name of the derived file inside of the container
+ * @param monitor {@link TaskMonitor}
+ * @return boolean true if file exists at time of query, false if file is not in cache
+ * @throws CancelledException if user cancels
+ * @throws IOException if other IO error
+ */
+ public boolean hasDerivedFile(FSRL fsrl, String derivedName, TaskMonitor monitor)
+ throws CancelledException, IOException {
+ FileCacheEntry cacheEntry = getCacheFile(fsrl, monitor);
+ String derivedMD5 = fileCacheNameIndex.get(cacheEntry.md5, derivedName);
+ return derivedMD5 != null;
+ }
+
/**
* Returns true if the container file probably holds one of the currently supported
* filesystem types.
diff --git a/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/sevenzip/SevenZipFileSystem.java b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/sevenzip/SevenZipFileSystem.java
index 4fd784b11e..05e1bcb43e 100644
--- a/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/sevenzip/SevenZipFileSystem.java
+++ b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/sevenzip/SevenZipFileSystem.java
@@ -18,197 +18,165 @@ package ghidra.file.formats.sevenzip;
import java.io.*;
import java.util.*;
-import ghidra.app.util.bin.ByteProvider;
-import ghidra.app.util.recognizer.*;
import ghidra.formats.gfilesystem.*;
import ghidra.formats.gfilesystem.annotations.FileSystemInfo;
-import ghidra.formats.gfilesystem.factory.GFileSystemBaseFactory;
import ghidra.util.Msg;
+import ghidra.util.NumericUtilities;
import ghidra.util.exception.CancelledException;
-import ghidra.util.exception.CryptoException;
import ghidra.util.task.TaskMonitor;
import net.sf.sevenzipjbinding.*;
import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
+import utilities.util.FileUtilities;
-@FileSystemInfo(type = "7zip", description = "7Zip", factory = GFileSystemBaseFactory.class)
-public class SevenZipFileSystem extends GFileSystemBase {
+@FileSystemInfo(type = "7zip", description = "7Zip", factory = SevenZipFileSystemFactory.class)
+public class SevenZipFileSystem implements GFileSystem {
- private static final Recognizer[] RECOGNIZERS =
- new Recognizer[] { new SevenZipRecognizer(), new XZRecognizer(), new Bzip2Recognizer(),
- //new GzipRecognizer(),
- //new TarRecognizer(),
- //new PkzipRecognizer(),
- new MSWIMRecognizer(), new ArjRecognizer(), new CabarcRecognizer(), new CHMRecognizer(),
- //new CpioRecognizer(),
- new CramFSRecognizer(), new DebRecognizer(),
- //new DmgRecognizer(),
- //new ISO9660Recognizer(),
- new LhaRecognizer(), new RarRecognizer(), new RPMRecognizer(), new VHDRecognizer(),
- new XarRecognizer(), new UnixCompressRecognizer() };
+ private FileSystemService fileSystemService;
+ private FileSystemIndexHelper+ * For each file in the archive, SZ will call this class's 1) getStream(), 2) prepare(), + * 3) lots of write()s, and then 4) setOperationResult(). + *
+ * This class writes the extracted bytes to a temp file, and then pushes that temp file + * into the FileSystem cache, and then deletes that temp file. + *
+ * Without this bulk extract method, SevenZip takes ~500ms per file when used via the singleton
+ * extract method.
+ */
+ private class SZExtractCallback implements IArchiveExtractCallback, ISequentialOutStream {
+
+ private TaskMonitor monitor;
+ private int currentIndex;
+ private File currentTempFile;
+ private OutputStream currentTempFileOutputStream;
+
+ public SZExtractCallback(TaskMonitor monitor) {
+ this.monitor = monitor;
}
@Override
- public int write(byte[] buffer) throws SevenZipException {
+ public ISequentialOutStream getStream(int index, ExtractAskMode extractAskMode)
+ throws SevenZipException {
+ // STEP 1: SevenZip calls this method to get a object it can use to write the bytes to.
+ // If we return null, SZ treats it as a skip.
try {
- outputStream.write(buffer);
+ if (!fileSystemService.hasDerivedFile(fsrl.getContainer(), Integer.toString(index),
+ monitor)) {
+ this.currentIndex = index;
+ return this;
+ }
+ }
+ catch (CancelledException | IOException e) {
+ // ignore
+ }
+ return null;
+ }
+
+ @Override
+ public void prepareOperation(ExtractAskMode extractAskMode) throws SevenZipException {
+ // STEP 2: SevenZip calls this method to further prepare to operate on the file.
+ // In our case, we only handle extract operations.
+ if (extractAskMode == ExtractAskMode.EXTRACT) {
+ try {
+ currentTempFile = File.createTempFile("ghidra_sevenzip_", ".tmp");
+ currentTempFileOutputStream = new FileOutputStream(currentTempFile);
+ }
+ catch (IOException e) {
+ throw new SevenZipException(e);
+ }
+ }
+ }
+
+ @Override
+ public int write(byte[] data) throws SevenZipException {
+ // STEP 3: SevenZip calls this multiple times for all the bytes in the file.
+ // We write them to our temp file.
+ try {
+ currentTempFileOutputStream.write(data);
+ return data.length;
}
catch (IOException e) {
throw new SevenZipException(e);
}
- return buffer.length;
}
+
+ @Override
+ public void setOperationResult(ExtractOperationResult extractOperationResult)
+ throws SevenZipException {
+ // STEP 4: SevenZip calls this to signal that the extract is done for this file.
+ if (currentTempFileOutputStream != null) {
+ try {
+ currentTempFileOutputStream.close();
+ extractOperationResultToException(extractOperationResult);
+ fileSystemService.getDerivedFilePush(fsrl.getContainer(),
+ Integer.toString(currentIndex), (os) -> {
+ try (InputStream is = new FileInputStream(currentTempFile)) {
+ FileUtilities.copyStreamToStream(is, os, monitor);
+ }
+ }, monitor);
+ currentTempFile.delete();
+ }
+ catch (IOException | CancelledException e) {
+ throw new SevenZipException(e);
+ }
+ finally {
+ currentTempFile = null;
+ currentTempFileOutputStream = null;
+ }
+ }
+ }
+
+ //@formatter:off
+ @Override public void setTotal(long total) throws SevenZipException { /* nada */ }
+ @Override public void setCompleted(long complete) throws SevenZipException {/* nada */ }
+ //@formatter:on
+
}
+
}
diff --git a/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/sevenzip/SevenZipFileSystemFactory.java b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/sevenzip/SevenZipFileSystemFactory.java
new file mode 100644
index 0000000000..1daab2d33e
--- /dev/null
+++ b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/sevenzip/SevenZipFileSystemFactory.java
@@ -0,0 +1,80 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ghidra.file.formats.sevenzip;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import ghidra.app.util.recognizer.*;
+import ghidra.formats.gfilesystem.*;
+import ghidra.formats.gfilesystem.factory.GFileSystemFactoryWithFile;
+import ghidra.formats.gfilesystem.factory.GFileSystemProbeBytesOnly;
+import ghidra.util.exception.CancelledException;
+import ghidra.util.task.TaskMonitor;
+
+public class SevenZipFileSystemFactory
+ implements GFileSystemFactoryWithFile