diff --git a/engine/main.py b/engine/main.py index 4608b31..bf9c771 100644 --- a/engine/main.py +++ b/engine/main.py @@ -25,7 +25,8 @@ import logging; logger = logging.getLogger(__name__); logger.info("import") import configparser import pathlib import tarfile -from io import TextIOWrapper +#from io import TextIOWrapper +import mmap #Third Party from calfbox import cbox @@ -62,7 +63,7 @@ class Data(TemplateData): def _processAfterInit(self): session = self.parentSession #We just want to work conveniently in init with it by creating a local var. - + self.auditioner = None #set later. self.cachedSerializedDataForStartEngine = None def allInstr(self): @@ -84,9 +85,12 @@ class Data(TemplateData): basePath = pathlib.Path(baseSamplePath) if not basePath.exists(): - raise OSError(f"{basePath} does not exists to load samples from.") + logger.error(f"{basePath} does not exists to load samples from.") + #raise OSError() #no. this is actually fine with the user control over the download dialog in the gui + return #just do nothing if not basePath.is_dir(): - raise OSError(f"{basePath} is not a directory..") + logger.error(f"{basePath} is not a directory.") + return firstRun = not self.libraries @@ -318,13 +322,34 @@ class Library(object): needTarData = True if needTarData: + """We open the tar file without using the very slow extractfile method. Instead we + stream the beginning of the file, which we forced to be the ini during tar-creation.""" + + + startmarker = "[library]".encode() #includes the marker + endmarker = "[endoflibrary]".encode() #excludes the marker. Which is what we want! + + with open (tarFilePath, "rb", 0) as f, mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as s: + start = s.find(startmarker) + end = s.find(endmarker) + f.seek(start, 0) #0 means from beginning of file + result = f.read(end-start) + + self.config = configparser.ConfigParser() + self.config.read_string(result.decode()) + assert "library" in self.config.sections(), self.config.sections() + assert not "endoflibrary" in self.config.sections(), self.config.sections() + #self.config is permant now. We can close the file object + + + """ #Old Code. with tarfile.open(name=tarFilePath, mode='r:') as opentarfile: #PermissionErrors are caught by the constructing line in main/Data above iniFileObject = TextIOWrapper(opentarfile.extractfile("library.ini")) self.config = configparser.ConfigParser() self.config.read_file(iniFileObject) #self.config is permant now. We can close the file object - """ + #Extract an image file. But only if it exists. tarfile.getmember is basically an exist-check that trows KeyError if not try: diff --git a/engine/resources/000 - Default.tar b/engine/resources/000 - Default.tar index fdcd7de..5ab9897 100644 Binary files a/engine/resources/000 - Default.tar and b/engine/resources/000 - Default.tar differ diff --git a/qtgui/chooseDownloadDirectory.py b/qtgui/chooseDownloadDirectory.py index eb92beb..723c717 100644 --- a/qtgui/chooseDownloadDirectory.py +++ b/qtgui/chooseDownloadDirectory.py @@ -25,29 +25,47 @@ import logging; logger = logging.getLogger(__name__); logger.info("import") #Standard Lib from pathlib import Path import os.path -from os import makedirs +import os +import json +from time import sleep +from shutil import disk_usage -#System Wide Modules +#Third Party, system wide Modules from PyQt5 import QtCore, QtWidgets, QtGui -#Template Moduiles -from .designer.chooseDownloadDirectory import Ui_ChooseDownloadDirectory -from .resources import * #has the translation +#Template Modules +from template.pySmartDL import SmartDL +from template.helper import humanReadableFilesize #Client Modules +from .designer.chooseDownloadDirectory import Ui_ChooseDownloadDirectory +from .resources import * #has the translation import engine.api as api from engine.config import * #imports METADATA from qtgui.resources import * #Has the logo + + class ChooseDownloadDirectory(QtWidgets.QDialog): - def __init__(self): + """This dialog must only be called when the program is already after the initial init state. + Especially on the very first run because we call api.rescanSampleDirectory on accept + + It gets constructed from init each time. No need to reset values. + """ + + def __init__(self, parentMainWindow, autoStartOnFirstRun=False): super().__init__() #no parent, this is the top level window at this time. self.setModal(True) #block until closed self.ui = Ui_ChooseDownloadDirectory() self.ui.setupUi(self) + self.parentMainWindow = parentMainWindow + self.autoStartOnFirstRun = autoStartOnFirstRun + self.currentSmartDL = None #will be a SmartDL object when a download is in progress + self._abortDownloadNOW = False #if set to True during download it will stop the process + settings = QtCore.QSettings("LaborejoSoftwareSuite", METADATA["shortName"]) if settings.contains("sampleDownloadDirectory"): self.ui.pathComboBox.insertItem(0, settings.value("sampleDownloadDirectory", type=str)) @@ -57,9 +75,27 @@ class ChooseDownloadDirectory(QtWidgets.QDialog): self.ui.buttonBox.accepted.connect(self.accept) self.ui.buttonBox.rejected.connect(self.reject) + self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Rescan Sample Directory")) + self._cancelDefaultText = QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Don't rescan") + self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Cancel).setText(self._cancelDefaultText) + self.ui.openFileDialogButton.setText("") self.ui.openFileDialogButton.setIcon(self.style().standardIcon(getattr(QtWidgets.QStyle, "SP_DialogOpenButton"))) self.ui.openFileDialogButton.clicked.connect(self.requestPathFromDialog) + + self.ui.downloadPushButton.setEnabled(True) + self._downloadDefaultText = QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Download and Update Instrument Libraries") + self._pauseText = QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Pause Download") + self._resumeText = QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Resume Download") + self.ui.downloadPushButton.setText(self._downloadDefaultText) + self.ui.downloadPushButton.clicked.connect(self.startDownload) + + self.ui.progressLabel.setVisible(False) + self.ui.labelSpeed.setVisible(False) + self.ui.progressBar.setValue(0) + self.ui.progressBar.setEnabled(False) + self.ui.progressBar.setVisible(False) + self.exec() @@ -76,18 +112,217 @@ class ChooseDownloadDirectory(QtWidgets.QDialog): def accept(self): self.path = self.ui.pathComboBox.currentText() #easy abstraction so that the caller does not need to know our widget name settings = QtCore.QSettings("LaborejoSoftwareSuite", METADATA["shortName"]) - if not os.path.exists(self.path): - try: - makedirs(self.path) - except: - pass #file saving error logging is handled later - - #There is no guarantee that the dir really exists. but at this point the user is on its own. - #It is allowed to use /dev/null after all - settings.setValue("sampleDownloadDirectory", self.path) - api.rescanSampleDirectory(self.path) + + sampleDir = Path(self.path) + if sampleDir.exists() and sampleDir.is_dir() and os.access(self.path, os.R_OK): #readable? + logger.info(f"New sample dir path {self.path} accepted. Remembering for later.") + settings.setValue("sampleDownloadDirectory", self.path) + if not self.autoStartOnFirstRun: + api.rescanSampleDirectory(self.path) + else: + logger.info(f"Attempted to rescan sample dir with path {self.path} that does not exist or is not readable. Ignoring.") + super().accept() def reject(self): + #During Download the button says "Abort". We make sure all downloads are actually stopped and then exist as normal. + self._abortDownloadNOW = True #just to be safe + + if self.currentSmartDL: + self.currentSmartDL.unpause() #Just stopping here while paused will freeze Qt. With setting the abort switch above we can let it play out. + self.path = None super().reject() + + def closeEvent(self, event): + """Window manager close. + We tried to stop downloading here in the past, but + that was unreliable. We now intentionally prevent closing while the download is running. + User can always press the "Abort Download" button explictely. + """ + if self.currentSmartDL: + event.ignore() + else: + self._abortDownloadNOW = True #just to be safe + event.accept() + super().closeEvent(event) + + def startDownload(self): + """First we download the index file from our own server. + That contains a list of mirror servers and a list of libraries with versions and sha256sums + + http://itaybb.github.io/pySmartDL/examples.html#example-6-use-the-nonblocking-flag-and-get-information-during-the-download-process + """ + + #hier weiter machen. Während des Downloads muss der Download button ein pause/resume werden. + #und abort tatsächlich zuverlässig den momentanen und alle zukünftigen downloads in der queue abbrechen + + def _resetDialog(message): + self.ui.pathComboBox.setEnabled(True) + + self.ui.downloadPushButton.setEnabled(True) + try: + self.ui.downloadPushButton.clicked.disconnect() + except TypeError: + pass #already disconnect + self.ui.downloadPushButton.clicked.connect(self.startDownload) + self.ui.downloadPushButton.setText(self._downloadDefaultText) + + self.ui.progressLabel.setVisible(True) + self.ui.progressLabel.setText(message) + + self.ui.buttonBox.setEnabled(True) + self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setEnabled(True) + self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Cancel).setText(self._cancelDefaultText) + + self.currentSmartDL = None #TODO: Make sure all downloads are stopped + + self.parentMainWindow.qtApp.processEvents() + + def _pauseUnpause(): + if not self.currentSmartDL: + raise RuntimeError("Reached the pause/unpause function without a running download. This should not have been possible and needs to be bug-fixed") + + st = self.currentSmartDL.get_status() + if st == "downloading": + self.currentSmartDL.pause() + self.ui.downloadPushButton.setText(self._resumeText) + elif st == "paused": + self.currentSmartDL.unpause() + self.ui.downloadPushButton.setText(self._pauseText) + else: + logger.warning(f"Reached download state {st} from pause/unpause the button. This was not intended but is not a problem either.") + self.parentMainWindow.qtApp.processEvents() + + if self.currentSmartDL: + return + + if not self.ui.pathComboBox.currentText(): + logger.warning("Tried to download without giving a directory. Please try again.") + _resetDialog(QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Warning: Tried to download without giving a directory. Please try again.")) + return + + if not Path(self.ui.pathComboBox.currentText()).exists(): + os.makedirs(self.ui.pathComboBox.currentText()) + + if not Path(self.ui.pathComboBox.currentText()).exists() or not Path(self.ui.pathComboBox.currentText()).is_dir() or not os.access(self.ui.pathComboBox.currentText(), os.W_OK): #writable? + logger.warning("Tried to download without giving an existing, writable directory. Please check your filesystem.") + _resetDialog(QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Warning: Tried to download without giving an existing, writable directory. Please check your filesystem.")) + return + + logger.info("Downloading index file to temporary directory.") + self.ui.progressLabel.setVisible(True) + self.ui.progressLabel.setText(QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Fetching instrument list from server laborejo.org")) + self.ui.downloadPushButton.setEnabled(False) + #self.ui.buttonBox.setEnabled(False) + self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setEnabled(False) + self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Cancel).setText(QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Abort Download")) + + self.ui.pathComboBox.setEnabled(False) + + self.parentMainWindow.qtApp.processEvents() + + indexUrl = "https://www.laborejo.org/downloads/tembro-instruments/downloadindex.json" + indexUrl = "http://0.0.0.0:8000/downloadindex.json" + + indexDL = SmartDL(indexUrl, progress_bar=False) # Because we didn't pass a destination path to the constructor, temporary path was chosen. + try: + indexDL.start() #Blocking. We wait for the file to finish. + except Exception as e: + logger.error(e) + _resetDialog(QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Error: Unable to download file\n{}\nReason:\n{}".format(indexUrl, e))) + return + + if not Path(indexDL.get_dest()).exists(): #to be extra sure + logger.error(f"File {indexUrl} was downloaded, but not found on disk!") + _resetDialog(QtCore.QCoreApplication.translate("ChooseDownloadDirectory", "Error: File was downloaded, but not found on disk!\n{}".format(indexDL.get_dest()))) + return + + with open(indexDL.get_dest(), "r") as indexf: + indexDict = json.loads(indexf.read()) + + indexDict["mirrors"].append("http://0.0.0.0:8000/") #TODO: Development + + #Test if there is enough disk space + required = indexDict["filesize"] + freeSpace = disk_usage(self.ui.pathComboBox.currentText()).free + logger.info(f"Download requires {required} bytes ({humanReadableFilesize(required)}). Free: {freeSpace} ({humanReadableFilesize(freeSpace)}) ") + if required >= freeSpace: + logger.error(f"Download requires {required} bytes ({humanReadableFilesize(required)}). You have free: {freeSpace} ({humanReadableFilesize(freeSpace)})") + msg = QtCore.QCoreApplication.translate("ChooseDownloadDirectory", f"Download requires {humanReadableFilesize(required)}. You have only {humanReadableFilesize(freeSpace)} free.") + _resetDialog(msg) + return + + #We have the index. We have a download path. All tests ok. Start the actual download. + logger.info(f"Downloading instrument libraries to {self.ui.pathComboBox.currentText()}") + + self.ui.progressBar.setVisible(True) + self.ui.progressBar.setValue(0) + self.ui.labelSpeed.setVisible(True) + self.ui.labelSpeed.setText("") + + self.ui.downloadPushButton.setEnabled(True) + self.ui.downloadPushButton.clicked.disconnect() + + totalDownloads = len(indexDict["libraries"]) + downloadCounter = 0 + + #Make sure all gui texts and elements are visible + self.parentMainWindow.qtApp.processEvents() + + for libId, entry in indexDict["libraries"].items(): + if self._abortDownloadNOW: + if self.currentSmartDL: + self.currentSmartDL.stop() + continue + + urlMirrorList = (mirror + entry["tar"] for mirror in indexDict["mirrors"]) + + logger.info(f"Downloading {entry['name']}") + + + obj = SmartDL(urlMirrorList, self.ui.pathComboBox.currentText(), progress_bar=False) + self.currentSmartDL = obj + #With Hash Verification it will not only test the download but also don't double-download an existing file. + obj.add_hash_verification("sha256", entry["sha256"]) + obj.start(blocking=False) + + #Set the progress label text. But keep it international + self.ui.progressLabel.setText(f"[{downloadCounter+1}/{totalDownloads}]: {entry['name']}") + + self.ui.downloadPushButton.setText(self._pauseText) + self.ui.downloadPushButton.clicked.connect(_pauseUnpause) + + self.parentMainWindow.qtApp.processEvents() + + while not obj.isFinished(): + #This loops also runs during download pause + if self._abortDownloadNOW: + obj.stop() + if self.currentSmartDL: + self.currentSmartDL.stop() + self.currentSmartDL = None + continue + + self.ui.labelSpeed.setText(f"{obj.get_speed(human=True)}") + self.ui.progressBar.setValue(int(obj.get_progress()*100)) + self.parentMainWindow.qtApp.processEvents() #Keep Qt responsive + sleep(0.01) + + if obj.isSuccessful(): #This is triggered at least when the file already exists in the right version + self.ui.labelSpeed.setText("") + self.ui.progressBar.setValue(100) + self.ui.downloadPushButton.clicked.disconnect() + self.parentMainWindow.qtApp.processEvents() #Keep Qt responsive + else: + for e in obj.get_errors(): + logger.error(f"{e}") + + downloadCounter += 1 + + #assert downloadCounter == totalDownloads not true in case of Abort. But was true long enough development that I confirmed everything works + + _resetDialog(f"[{downloadCounter}/{totalDownloads}]") + self.ui.downloadPushButton.setEnabled(False) + self.parentMainWindow.qtApp.processEvents() + logger.info("Download process finished or aborted") diff --git a/qtgui/designer/chooseDownloadDirectory.py b/qtgui/designer/chooseDownloadDirectory.py index e5c5fb3..c5e1669 100644 --- a/qtgui/designer/chooseDownloadDirectory.py +++ b/qtgui/designer/chooseDownloadDirectory.py @@ -2,7 +2,7 @@ # Form implementation generated from reading ui file 'chooseDownloadDirectory.ui' # -# Created by: PyQt5 UI code generator 5.15.4 +# Created by: PyQt5 UI code generator 5.15.6 # # WARNING: Any manual changes made to this file will be lost when pyuic5 is # run again. Do not edit this file unless you know what you are doing. @@ -14,7 +14,7 @@ from PyQt5 import QtCore, QtGui, QtWidgets class Ui_ChooseDownloadDirectory(object): def setupUi(self, ChooseDownloadDirectory): ChooseDownloadDirectory.setObjectName("ChooseDownloadDirectory") - ChooseDownloadDirectory.resize(410, 329) + ChooseDownloadDirectory.resize(410, 391) self.verticalLayout = QtWidgets.QVBoxLayout(ChooseDownloadDirectory) self.verticalLayout.setObjectName("verticalLayout") self.layoutWidget = QtWidgets.QWidget(ChooseDownloadDirectory) @@ -48,18 +48,36 @@ class Ui_ChooseDownloadDirectory(object): self.openFileDialogButton.setObjectName("openFileDialogButton") self.horizontalLayout_2.addWidget(self.openFileDialogButton) self.verticalLayout.addLayout(self.horizontalLayout_2) + spacerItem = QtWidgets.QSpacerItem(20, 30, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) + self.verticalLayout.addItem(spacerItem) + self.progressLabel = QtWidgets.QLabel(ChooseDownloadDirectory) + self.progressLabel.setAlignment(QtCore.Qt.AlignCenter) + self.progressLabel.setObjectName("progressLabel") + self.verticalLayout.addWidget(self.progressLabel) + self.widget = QtWidgets.QWidget(ChooseDownloadDirectory) + self.widget.setObjectName("widget") + self.horizontalLayout = QtWidgets.QHBoxLayout(self.widget) + self.horizontalLayout.setObjectName("horizontalLayout") + self.progressBar = QtWidgets.QProgressBar(self.widget) + self.progressBar.setProperty("value", 24) + self.progressBar.setTextVisible(True) + self.progressBar.setOrientation(QtCore.Qt.Horizontal) + self.progressBar.setInvertedAppearance(False) + self.progressBar.setFormat("%p%") + self.progressBar.setObjectName("progressBar") + self.horizontalLayout.addWidget(self.progressBar) + self.labelSpeed = QtWidgets.QLabel(self.widget) + self.labelSpeed.setObjectName("labelSpeed") + self.horizontalLayout.addWidget(self.labelSpeed) + self.verticalLayout.addWidget(self.widget) + self.downloadPushButton = QtWidgets.QPushButton(ChooseDownloadDirectory) + self.downloadPushButton.setObjectName("downloadPushButton") + self.verticalLayout.addWidget(self.downloadPushButton) self.buttonBox = QtWidgets.QDialogButtonBox(ChooseDownloadDirectory) self.buttonBox.setOrientation(QtCore.Qt.Horizontal) self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Cancel|QtWidgets.QDialogButtonBox.Ok) self.buttonBox.setObjectName("buttonBox") self.verticalLayout.addWidget(self.buttonBox) - self.widget_2 = QtWidgets.QWidget(ChooseDownloadDirectory) - self.widget_2.setObjectName("widget_2") - self.horizontalLayout_4 = QtWidgets.QHBoxLayout(self.widget_2) - self.horizontalLayout_4.setContentsMargins(0, 0, 0, 0) - self.horizontalLayout_4.setSpacing(0) - self.horizontalLayout_4.setObjectName("horizontalLayout_4") - self.verticalLayout.addWidget(self.widget_2) self.retranslateUi(ChooseDownloadDirectory) QtCore.QMetaObject.connectSlotsByName(ChooseDownloadDirectory) @@ -67,5 +85,8 @@ class Ui_ChooseDownloadDirectory(object): def retranslateUi(self, ChooseDownloadDirectory): _translate = QtCore.QCoreApplication.translate ChooseDownloadDirectory.setWindowTitle(_translate("ChooseDownloadDirectory", "Choose Session Directory")) - self.label.setText(_translate("ChooseDownloadDirectory", "

Please choose a directory for your sample files. The location can be read-only and will be shared by all sessions.

At the moment you have to manually download the files and move them to this directory. An integrated downloader will be added to this program after its beta-phase.

Changing the directory requires a program restart. The sample libraries will only be scanned on program start as well.

The download URL is:

https://laborejo.org/downloads/tembro-instruments/

")) + self.label.setText(_translate("ChooseDownloadDirectory", "

Please choose a directory for your sample files. The location will be shared by all sessions.

The instrument sample files are around 10 Gigabytes in size.

You can either use this integrated downloader or get the files manually from: https://laborejo.org/downloads/tembro-instruments/

The downloader can be used to update instruments at any time and will not download the same instrument/version twice.

")) self.openFileDialogButton.setText(_translate("ChooseDownloadDirectory", "Choose Directory")) + self.progressLabel.setText(_translate("ChooseDownloadDirectory", "Library 4 of 10: Kazoo Ensemble")) + self.labelSpeed.setText(_translate("ChooseDownloadDirectory", "TextLabel")) + self.downloadPushButton.setText(_translate("ChooseDownloadDirectory", "Download")) diff --git a/qtgui/designer/chooseDownloadDirectory.ui b/qtgui/designer/chooseDownloadDirectory.ui index 08d642a..870f370 100644 --- a/qtgui/designer/chooseDownloadDirectory.ui +++ b/qtgui/designer/chooseDownloadDirectory.ui @@ -7,7 +7,7 @@ 0 0 410 - 329 + 391 @@ -23,7 +23,7 @@ - <html><head/><body><p>Please choose a directory for your sample files. The location can be read-only and will be shared by all sessions. </p><p>At the moment you have to manually download the files and move them to this directory. An integrated downloader will be added to this program after its beta-phase. </p><p>Changing the directory requires a program restart. The sample libraries will only be scanned on program start as well.</p><p>The download URL is:</p><p align="center"><a href="https://laborejo.org/downloads/tembro-instruments/"><span style=" text-decoration: underline; color:#00ffff;">https://laborejo.org/downloads/tembro-instruments/</span></a></p></body></html> + <html><head/><body><p>Please choose a directory for your sample files. The location will be shared by all sessions. </p><p>The instrument sample files are around 10 Gigabytes in size.</p><p>You can either use this integrated downloader or get the files manually from: <a href="https://laborejo.org/downloads/tembro-instruments/"><span style=" text-decoration: underline; color:#00ffff;">https://laborejo.org/downloads/tembro-instruments/ </span></a></p><p>The downloader can be used to update instruments at any time and will not download the same instrument/version twice.</p></body></html> true @@ -67,36 +67,80 @@ - + - Qt::Horizontal + Qt::Vertical - - QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + QSizePolicy::Fixed + + + + 20 + 30 + + + + + + + + Library 4 of 10: Kazoo Ensemble + + + Qt::AlignCenter - - - - 0 - - - 0 - - - 0 - - - 0 - - - 0 - + + + + + + 24 + + + true + + + Qt::Horizontal + + + false + + + %p% + + + + + + + TextLabel + + + + + + + Download + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + diff --git a/qtgui/mainwindow.py b/qtgui/mainwindow.py index 64ad7ea..e2cd14a 100644 --- a/qtgui/mainwindow.py +++ b/qtgui/mainwindow.py @@ -21,7 +21,8 @@ along with this program. If not, see . import logging; logging.info("import {}".format(__file__)) #Standard Library Modules - +import pathlib +import os #Third Party Modules from PyQt5 import QtWidgets, QtCore, QtGui @@ -87,22 +88,42 @@ class MainWindow(TemplateMainWindow): self.setupMenu() + + #Find out if we already have a global sample directory additionalData={} settings = QtCore.QSettings("LaborejoSoftwareSuite", METADATA["shortName"]) + if settings.contains("sampleDownloadDirectory"): + sampleDir = pathlib.Path(settings.value("sampleDownloadDirectory", type=str)) + if sampleDir.exists() and sampleDir.is_dir() and os.access(settings.value("sampleDownloadDirectory", type=str), os.R_OK): #readable? + sampleDirOk = True + else: + sampleDirOk = False + else: + sampleDirOk = False + + if sampleDirOk: additionalData["baseSamplePath"] = settings.value("sampleDownloadDirectory", type=str) else: #first start. - dialog = ChooseDownloadDirectory() + dialog = ChooseDownloadDirectory(parentMainWindow=self, autoStartOnFirstRun=True) if dialog.path: - additionalData["baseSamplePath"] = dialog.path + #It is possible that a download has happened at this point and we have a valid sample dir + #Or we have a valid, but empty sample dir. + #But it is also possible that there is garbage in the directory input field and the user just clicked ok. + sampleDir = pathlib.Path(dialog.path) + if sampleDir.exists() and sampleDir.is_dir() and os.access(dialog.path, os.R_OK): #readable? + additionalData["baseSamplePath"] = dialog.path + else: + additionalData["baseSamplePath"] = "/tmp" else: - additionalData["baseSamplePath"] = "/tmp" #TODO: At least give a message. - - api.callbacks.rescanSampleDir.append(self.react_rescanSampleDir) #This only happens on actual, manually instructed rescanning through the api. We instruct this through our Rescan-Dialog. + additionalData["baseSamplePath"] = "/tmp" + print (additionalData) self.start(additionalData) #This shows the GUI, or not, depends on the NSM gui save setting. We need to call that after the menu, otherwise the about dialog will block and then we get new menu entries, which looks strange. + api.callbacks.rescanSampleDir.append(self.react_rescanSampleDir) #This only happens on actual, manually instructed rescanning through the api. We instruct this through our Rescan-Dialog. + #Statusbar will show possible actions, such as "use scrollwheel to transpose" #self.statusBar().showMessage(QtCore.QCoreApplication.translate("Statusbar", "")) self.statusBar().showMessage("") @@ -113,7 +134,7 @@ class MainWindow(TemplateMainWindow): #New menu entries and template-menu overrides #self.menu.connectMenuEntry("actionAbout", lambda: print("About Dialog Menu deactivated")) #deactivates the original function #self.menu.addMenuEntry("menuEdit", "actionNils", "Nils", lambda: print("Merle")) - self.menu.addMenuEntry("menuEdit", "actionSampleDirPathDialog", "Sample Files Location", ChooseDownloadDirectory) + self.menu.addMenuEntry("menuEdit", "actionSampleDirPathDialog", "Sample Files Location", lambda: ChooseDownloadDirectory(parentMainWindow=self)) self.menu.addMenuEntry("menuEdit", "actionLoadSamples", QtCore.QCoreApplication.translate("Menu", "Load all Instrument Samples (slow!)"), api.loadAllInstrumentSamples) self.menu.addMenuEntry("menuEdit", "actionUnloadSamples", QtCore.QCoreApplication.translate("Menu", "Unload all Instrument Samples (also slow.)"), api.unloadAllInstrumentSamples) #self.menu.connectMenuEntry("actionNils", lambda: print("Override")) diff --git a/template/helper.py b/template/helper.py index 1e69f64..2fef53f 100644 --- a/template/helper.py +++ b/template/helper.py @@ -4,7 +4,7 @@ """ Copyright 2022, Nils Hilbricht, Germany ( https://www.hilbricht.net ) -Most of this file is trivial code and does not reach the "Schaffenshöhe" for Urheberrecht. +Most of this file is trivial code and does not reach the "Schaffenshöhe" for Urheberrecht. If it is: This is practically intended as my Public Domain software. @@ -74,7 +74,7 @@ def flatList(lst): yield elem def compress(input, inputLowest, inputHighest, outputLowest, outputHighest): - return (input-inputLowest) / (inputHighest-inputLowest) * (outputHighest-outputLowest) + outputLowest + return (input-inputLowest) / (inputHighest-inputLowest) * (outputHighest-outputLowest) + outputLowest def listToUniqueKeepOrder(seq): seen = set() @@ -102,3 +102,9 @@ def provokecrash(): p[0] = b'x' +def humanReadableFilesize(num, suffix="B"): + for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: + if abs(num) < 1024.0: + return f"{num:3.1f}{unit}{suffix}" + num /= 1024.0 + return f"{num:.1f}Yi{suffix}" diff --git a/template/pySmartDL/LICENSE.txt b/template/pySmartDL/LICENSE.txt new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/template/pySmartDL/LICENSE.txt @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/template/pySmartDL/README.md b/template/pySmartDL/README.md new file mode 100644 index 0000000..57d17e1 --- /dev/null +++ b/template/pySmartDL/README.md @@ -0,0 +1,59 @@ +Python Smart Download Manager -- pySmartDL +========================================== + +``pySmartDL`` strives to be a full-fledged smart download manager for Python. Main features: + +* Built-in download acceleration (with the `multipart downloading technique `_). +* Mirrors support. +* Pause/Unpause feature. +* Speed limiting feature. +* Hash checking. +* Non-blocking, shows progress bar, download speed and eta. +* Full support for custom headers and methods. +* Python 3 Support + +Project Links +============= + + * Downloads: http://pypi.python.org/pypi/pySmartDL/ + * Documentation: http://itaybb.github.io/pySmartDL/ + * Project page: https://github.com/iTaybb/pySmartDL/ + * Bugs and Issues: https://github.com/iTaybb/pySmartDL/issues + +Installation +============ + +**Using pip (recommended way)** + + Make sure python-pip is installed on you system. If you are using virtualenv, then pip is alredy installed into environments created by virtualenv. Run pip to install pySmartDL: + + ``pip install pySmartDL`` + +**From Source** + + The pySmartDL package is installed from source using distutils in the usual way. Download the `source distribution `_ first. Unpack the source zip and run the following to install the package site-wide: + + ``python setup.py install`` + +Usage +===== + +Download is as simple as creating an instance and starting it: + + from pySmartDL import SmartDL + + url = "https://github.com/iTaybb/pySmartDL/raw/master/test/7za920.zip" + dest = "C:\\Downloads\\" # or '~/Downloads/' on linux + + obj = SmartDL(url, dest) + obj.start() + # [*] 0.23 Mb / 0.37 Mb @ 88.00Kb/s [##########--------] [60%, 2s left] + + path = obj.get_dest() + +Requirements +============== + + * Python 3.4 or greater. + +Copyright (C) 2014-2020 Itay Brandes. \ No newline at end of file diff --git a/template/pySmartDL/__init__.py b/template/pySmartDL/__init__.py new file mode 100644 index 0000000..411194d --- /dev/null +++ b/template/pySmartDL/__init__.py @@ -0,0 +1,4 @@ +from .pySmartDL import SmartDL, HashFailedException, CanceledException +from . import utils + +__version__ = pySmartDL.__version__ \ No newline at end of file diff --git a/template/pySmartDL/control_thread.py b/template/pySmartDL/control_thread.py new file mode 100644 index 0000000..eb407ee --- /dev/null +++ b/template/pySmartDL/control_thread.py @@ -0,0 +1,119 @@ +import threading +import time + +from . import utils + +class ControlThread(threading.Thread): + "A class that shows information about a running SmartDL object." + def __init__(self, obj): + threading.Thread.__init__(self) + self.obj = obj + self.progress_bar = obj.progress_bar + self.logger = obj.logger + self.shared_var = obj.shared_var + + self.dl_speed = 0 + self.eta = 0 + self.lastBytesSamples = [] # list with last 50 Bytes Samples. + self.last_calculated_totalBytes = 0 + self.calcETA_queue = [] + self.calcETA_i = 0 + self.calcETA_val = 0 + self.dl_time = -1.0 + + self.daemon = True + self.start() + + def run(self): + t1 = time.time() + self.logger.info("Control thread has been started.") + + while not self.obj.pool.done(): + self.dl_speed = self.calcDownloadSpeed(self.shared_var.value) + if self.dl_speed > 0: + self.eta = self.calcETA((self.obj.filesize-self.shared_var.value)/self.dl_speed) + + if self.progress_bar: + if self.obj.filesize: + status = r"[*] %s / %s @ %s/s %s [%3.1f%%, %s left] " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0*self.shared_var.value/self.obj.filesize), self.shared_var.value * 100.0 / self.obj.filesize, utils.time_human(self.eta, fmt_short=True)) + else: + status = r"[*] %s / ??? MB @ %s/s " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.dl_speed)) + status = status + chr(8)*(len(status)+1) + print(status, end=' ', flush=True) + time.sleep(0.1) + + if self.obj._killed: + self.logger.info("File download process has been stopped.") + return + + if self.progress_bar: + if self.obj.filesize: + print(r"[*] %s / %s @ %s/s %s [100%%, 0s left] " % (utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0))) + else: + print(r"[*] %s / %s @ %s/s " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.dl_speed))) + + t2 = time.time() + self.dl_time = float(t2-t1) + + while self.obj.post_threadpool_thread.is_alive(): + time.sleep(0.1) + + self.obj.pool.shutdown() + self.obj.status = "finished" + if not self.obj.errors: + self.logger.info("File downloaded within %.2f seconds." % self.dl_time) + + def get_eta(self): + if self.eta <= 0 or self.obj.status == 'paused': + return 0 + return self.eta + def get_speed(self): + if self.obj.status == 'paused': + return 0 + return self.dl_speed + def get_dl_size(self): + if self.shared_var.value > self.obj.filesize: + return self.obj.filesize + return self.shared_var.value + def get_final_filesize(self): + return self.obj.filesize + def get_progress(self): + if not self.obj.filesize: + return 0 + return 1.0*self.shared_var.value/self.obj.filesize + def get_dl_time(self): + return self.dl_time + + def calcDownloadSpeed(self, totalBytes, sampleCount=30, sampleDuration=0.1): + ''' + Function calculates the download rate. + @param totalBytes: The total amount of bytes. + @param sampleCount: How much samples should the function take into consideration. + @param sampleDuration: Duration of a sample in seconds. + ''' + l = self.lastBytesSamples + newBytes = totalBytes - self.last_calculated_totalBytes + self.last_calculated_totalBytes = totalBytes + if newBytes >= 0: # newBytes may be negetive, will happen + # if a thread has crushed and the totalBytes counter got decreased. + if len(l) == sampleCount: # calc download for last 3 seconds (30 * 100ms per signal emit) + l.pop(0) + + l.append(newBytes) + + dlRate = sum(l)/len(l)/sampleDuration + return dlRate + + def calcETA(self, eta): + self.calcETA_i += 1 + l = self.calcETA_queue + l.append(eta) + + if self.calcETA_i % 10 == 0: + self.calcETA_val = sum(l)/len(l) + if len(l) == 30: + l.pop(0) + + if self.calcETA_i < 50: + return 0 + return self.calcETA_val diff --git a/template/pySmartDL/download.py b/template/pySmartDL/download.py new file mode 100644 index 0000000..4f47517 --- /dev/null +++ b/template/pySmartDL/download.py @@ -0,0 +1,88 @@ +import os +import urllib.request, urllib.error, urllib.parse +import time +from . import utils + +def download(url, dest, requestArgs=None, context=None, startByte=0, endByte=None, timeout=4, shared_var=None, thread_shared_cmds=None, logger=None, retries=3): + "The basic download function that runs at each thread." + logger = logger or utils.DummyLogger() + req = urllib.request.Request(url, **requestArgs) + if endByte: + req.add_header('Range', 'bytes={:.0f}-{:.0f}'.format(startByte, endByte)) + logger.info("Downloading '{}' to '{}'...".format(url, dest)) + try: + # Context is used to skip ssl validation if verify is False. + urlObj = urllib.request.urlopen(req, timeout=timeout, context=context) + except urllib.error.HTTPError as e: + if e.code == 416: + ''' + HTTP 416 Error: Requested Range Not Satisfiable. Happens when we ask + for a range that is not available on the server. It will happen when + the server will try to send us a .html page that means something like + "you opened too many connections to our server". If this happens, we + will wait for the other threads to finish their connections and try again. + ''' + + if retries > 0: + logger.warning("Thread didn't got the file it was expecting. Retrying ({} times left)...".format(retries-1)) + time.sleep(5) + return download(url, dest, requestArgs, startByte, endByte, timeout, shared_var, thread_shared_cmds, logger, retries-1) + else: + raise + else: + raise + + with open(dest, 'wb') as f: + if endByte: + filesize = endByte-startByte + else: + try: + meta = urlObj.info() + filesize = int(urlObj.headers["Content-Length"]) + logger.info("Content-Length is {}.".format(filesize)) + except (IndexError, KeyError, TypeError): + logger.warning("Server did not send Content-Length. Filesize is unknown.") + + filesize_dl = 0 # total downloaded size + limitspeed_timestamp = time.time() + limitspeed_filesize = 0 + block_sz = 8192 + while True: + if thread_shared_cmds: + if 'stop' in thread_shared_cmds: + logger.info('stop command received. Stopping.') + raise CanceledException() + if 'pause' in thread_shared_cmds: + time.sleep(0.2) + continue + if 'limit' in thread_shared_cmds: + now = time.time() + time_passed = now - limitspeed_timestamp + if time_passed > 0.1: # we only observe the limit after 100ms + # if we passed the limit, we should + if (filesize_dl-limitspeed_filesize)/time_passed >= thread_shared_cmds['limit']: + time_to_sleep = (filesize_dl-limitspeed_filesize) / thread_shared_cmds['limit'] + logger.debug('Thread has downloaded {} in {}. Limit is {}/s. Slowing down...'.format(utils.sizeof_human(filesize_dl-limitspeed_filesize), utils.time_human(time_passed, fmt_short=True, show_ms=True), utils.sizeof_human(thread_shared_cmds['limit']))) + time.sleep(time_to_sleep) + continue + else: + limitspeed_timestamp = now + limitspeed_filesize = filesize_dl + + try: + buff = urlObj.read(block_sz) + except Exception as e: + logger.error(str(e)) + if shared_var: + shared_var.value -= filesize_dl + raise + + if not buff: + break + + filesize_dl += len(buff) + if shared_var: + shared_var.value += len(buff) + f.write(buff) + + urlObj.close() diff --git a/template/pySmartDL/pySmartDL.py b/template/pySmartDL/pySmartDL.py new file mode 100644 index 0000000..4fd5ebc --- /dev/null +++ b/template/pySmartDL/pySmartDL.py @@ -0,0 +1,675 @@ +import os +import sys +import urllib.request, urllib.error, urllib.parse +import copy +import threading +import time +import math +import tempfile +import base64 +import hashlib +import socket +import logging +from io import StringIO +import multiprocessing.dummy as multiprocessing +from ctypes import c_int +import json +import ssl + +from . import utils +from .control_thread import ControlThread +from .download import download + +__all__ = ['SmartDL', 'utils'] +__version_mjaor__ = 1 +__version_minor__ = 3 +__version_micro__ = 4 +__version__ = "{}.{}.{}".format(__version_mjaor__, __version_minor__, __version_micro__) + +class HashFailedException(Exception): + "Raised when hash check fails." + def __init__(self, fn, calc_hash, needed_hash): + self.filename = fn + self.calculated_hash = calc_hash + self.needed_hash = needed_hash + def __str__(self): + return 'HashFailedException({}, got {}, expected {})'.format(self.filename, self.calculated_hash, self.needed_hash) + def __repr__(self): + return ''.format(self.filename, self.calculated_hash, self.needed_hash) + +class CanceledException(Exception): + "Raised when the job is canceled." + def __init__(self): + pass + def __str__(self): + return 'CanceledException' + def __repr__(self): + return "" + +class SmartDL: + ''' + The main SmartDL class + + :param urls: Download url. It is possible to pass unsafe and unicode characters. You can also pass a list of urls, and those will be used as mirrors. + :type urls: string or list of strings + :param dest: Destination path. Default is `%TEMP%/pySmartDL/`. + :type dest: string + :param progress_bar: If True, prints a progress bar to the `stdout stream `_. Default is `True`. + :type progress_bar: bool + :param fix_urls: If true, attempts to fix urls with unsafe characters. + :type fix_urls: bool + :param threads: Number of threads to use. + :type threads: int + :param timeout: Timeout for network operations, in seconds. Default is 5. + :type timeout: int + :param logger: An optional logger. + :type logger: `logging.Logger` instance + :param connect_default_logger: If true, connects a default logger to the class. + :type connect_default_logger: bool + :param request_args: Arguments to be passed to a new urllib.request.Request instance in dictionary form. See `urllib.request docs `_ for options. + :type request_args: dict + :rtype: `SmartDL` instance + :param verify: If ssl certificates should be validated. + :type verify: bool + + .. NOTE:: + The provided dest may be a folder or a full path name (including filename). The workflow is: + + * If the path exists, and it's an existing folder, the file will be downloaded to there with the original filename. + * If the past does not exist, it will create the folders, if needed, and refer to the last section of the path as the filename. + * If you want to download to folder that does not exist at the moment, and want the module to fill in the filename, make sure the path ends with `os.sep`. + * If no path is provided, `%TEMP%/pySmartDL/` will be used. + ''' + + def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, threads=5, timeout=5, logger=None, connect_default_logger=False, request_args=None, verify=True): + if logger: + self.logger = logger + elif connect_default_logger: + self.logger = utils.create_debugging_logger() + else: + self.logger = utils.DummyLogger() + if request_args: + if "headers" not in request_args: + request_args["headers"] = dict() + self.requestArgs = request_args + else: + self.requestArgs = {"headers": dict()} + if "User-Agent" not in self.requestArgs["headers"]: + self.requestArgs["headers"]["User-Agent"] = utils.get_random_useragent() + self.mirrors = [urls] if isinstance(urls, str) else urls + if fix_urls: + self.mirrors = [utils.url_fix(x) for x in self.mirrors] + self.url = self.mirrors.pop(0) + self.logger.info('Using url "{}"'.format(self.url)) + + fn = urllib.parse.unquote(os.path.basename(urllib.parse.urlparse(self.url).path)) + self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) + if self.dest[-1] == os.sep: + if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]): + os.unlink(self.dest[:-1]) + self.dest += fn + if os.path.isdir(self.dest): + self.dest = os.path.join(self.dest, fn) + + self.progress_bar = progress_bar + self.threads_count = threads + self.timeout = timeout + self.current_attemp = 1 + self.attemps_limit = 4 + self.minChunkFile = 1024**2*2 # 2MB + self.filesize = 0 + self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded + self.thread_shared_cmds = {} + self.status = "ready" + self.verify_hash = False + self._killed = False + self._failed = False + self._start_func_blocking = True + self.errors = [] + + self.post_threadpool_thread = None + self.control_thread = None + + if not os.path.exists(os.path.dirname(self.dest)): + self.logger.info('Folder "{}" does not exist. Creating...'.format(os.path.dirname(self.dest))) + os.makedirs(os.path.dirname(self.dest)) + if not utils.is_HTTPRange_supported(self.url, timeout=self.timeout): + self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.") + self.threads_count = 1 + if os.path.exists(self.dest): + self.logger.warning('Destination "{}" already exists. Existing file will be removed.'.format(self.dest)) + if not os.path.exists(os.path.dirname(self.dest)): + self.logger.warning('Directory "{}" does not exist. Creating it...'.format(os.path.dirname(self.dest))) + os.makedirs(os.path.dirname(self.dest)) + + self.logger.info("Creating a ThreadPool of {} thread(s).".format(self.threads_count)) + self.pool = utils.ManagedThreadPoolExecutor(self.threads_count) + + if verify: + self.context = None + else: + self.context = ssl.create_default_context() + self.context.check_hostname = False + self.context.verify_mode = ssl.CERT_NONE + + def __str__(self): + return 'SmartDL(r"{}", dest=r"{}")'.format(self.url, self.dest) + + def __repr__(self): + return "".format(self.url) + + def add_basic_authentication(self, username, password): + ''' + Uses HTTP Basic Access authentication for the connection. + + :param username: Username. + :type username: string + :param password: Password. + :type password: string + ''' + auth_string = '{}:{}'.format(username, password) + base64string = base64.standard_b64encode(auth_string.encode('utf-8')) + self.requestArgs['headers']['Authorization'] = b"Basic " + base64string + + def add_hash_verification(self, algorithm, hash): + ''' + Adds hash verification to the download. + + If hash is not correct, will try different mirrors. If all mirrors aren't + passing hash verification, `HashFailedException` Exception will be raised. + + .. NOTE:: + If downloaded file already exist on the destination, and hash matches, pySmartDL will not download it again. + + .. WARNING:: + The hashing algorithm must be supported on your system, as documented at `hashlib documentation page `_. + + :param algorithm: Hashing algorithm. + :type algorithm: string + :param hash: Hash code. + :type hash: string + ''' + + self.verify_hash = True + self.hash_algorithm = algorithm + self.hash_code = hash + + def fetch_hash_sums(self): + ''' + Will attempt to fetch UNIX hash sums files (`SHA256SUMS`, `SHA1SUMS` or `MD5SUMS` files in + the same url directory). + + Calls `self.add_hash_verification` if successful. Returns if a matching hash was found. + + :rtype: bool + + *New in 1.2.1* + ''' + default_sums_filenames = ['SHA256SUMS', 'SHA1SUMS', 'MD5SUMS'] + folder = os.path.dirname(self.url) + orig_basename = os.path.basename(self.url) + + self.logger.info("Looking for SUMS files...") + for filename in default_sums_filenames: + try: + sums_url = "%s/%s" % (folder, filename) + sumsRequest = urllib.request.Request(sums_url, **self.requestArgs) + obj = urllib.request.urlopen(sumsRequest) + data = obj.read().split('\n') + obj.close() + + for line in data: + if orig_basename.lower() in line.lower(): + self.logger.info("Found a matching hash in %s" % sums_url) + algo = filename.rstrip('SUMS') + hash = line.split(' ')[0] + self.add_hash_verification(algo, hash) + return + + except urllib.error.HTTPError: + continue + + def start(self, blocking=None): + ''' + Starts the download task. Will raise `RuntimeError` if it's the object's already downloading. + + .. warning:: + If you're using the non-blocking mode, Exceptions won't be raised. In that case, call + `isSuccessful()` after the task is finished, to make sure the download succeeded. Call + `get_errors()` to get the the exceptions. + + :param blocking: If true, calling this function will block the thread until the download finished. Default is *True*. + :type blocking: bool + ''' + if not self.status == "ready": + raise RuntimeError("cannot start (current status is {})".format(self.status)) + self.logger.info('Starting a new SmartDL operation.') + + if blocking is None: + blocking = self._start_func_blocking + else: + self._start_func_blocking = blocking + + if self.mirrors: + self.logger.info('One URL and {} mirrors are loaded.'.format(len(self.mirrors))) + else: + self.logger.info('One URL is loaded.') + + if self.verify_hash and os.path.exists(self.dest): + if utils.get_file_hash(self.hash_algorithm, self.dest) == self.hash_code: + self.logger.info("Destination '%s' already exists, and the hash matches. No need to download." % self.dest) + self.status = 'finished' + return + + self.logger.info("Downloading '{}' to '{}'...".format(self.url, self.dest)) + req = urllib.request.Request(self.url, **self.requestArgs) + try: + urlObj = urllib.request.urlopen(req, timeout=self.timeout, context=self.context) + except (urllib.error.HTTPError, urllib.error.URLError, socket.timeout) as e: + self.errors.append(e) + if self.mirrors: + self.logger.info("{} Trying next mirror...".format(str(e))) + self.url = self.mirrors.pop(0) + self.logger.info('Using url "{}"'.format(self.url)) + self.start(blocking) + return + else: + self.logger.warning(str(e)) + self.errors.append(e) + self._failed = True + self.status = "finished" + raise + + try: + self.filesize = int(urlObj.headers["Content-Length"]) + self.logger.info("Content-Length is {} ({}).".format(self.filesize, utils.sizeof_human(self.filesize))) + except (IndexError, KeyError, TypeError): + self.logger.warning("Server did not send Content-Length. Filesize is unknown.") + self.filesize = 0 + + args = utils.calc_chunk_size(self.filesize, self.threads_count, self.minChunkFile) + bytes_per_thread = args[0][1] - args[0][0] + 1 + if len(args)>1: + self.logger.info("Launching {} threads (downloads {}/thread).".format(len(args), utils.sizeof_human(bytes_per_thread))) + else: + self.logger.info("Launching 1 thread (downloads {}).".format(utils.sizeof_human(bytes_per_thread))) + + self.status = "downloading" + + for i, arg in enumerate(args): + req = self.pool.submit( + download, + self.url, + self.dest+".%.3d" % i, + self.requestArgs, + self.context, + arg[0], + arg[1], + self.timeout, + self.shared_var, + self.thread_shared_cmds, + self.logger + ) + + self.post_threadpool_thread = threading.Thread( + target=post_threadpool_actions, + args=( + self.pool, + [[(self.dest+".%.3d" % i) for i in range(len(args))], self.dest], + self.filesize, + self + ) + ) + self.post_threadpool_thread.daemon = True + self.post_threadpool_thread.start() + + self.control_thread = ControlThread(self) + + if blocking: + self.wait(raise_exceptions=True) + + def _exc_callback(self, req, e): + self.errors.append(e[0]) + self.logger.exception(e[1]) + + def retry(self, eStr=""): + if self.current_attemp < self.attemps_limit: + self.current_attemp += 1 + self.status = "ready" + self.shared_var.value = 0 + self.thread_shared_cmds = {} + self.start() + + else: + s = 'The maximum retry attempts reached' + if eStr: + s += " ({})".format(eStr) + self.errors.append(urllib.error.HTTPError(self.url, "0", s, {}, StringIO())) + self._failed = True + + def try_next_mirror(self, e=None): + if self.mirrors: + if e: + self.errors.append(e) + self.status = "ready" + self.shared_var.value = 0 + self.url = self.mirrors.pop(0) + self.logger.info('Using url "{}"'.format(self.url)) + self.start() + else: + self._failed = True + self.errors.append(e) + + def get_eta(self, human=False): + ''' + Get estimated time of download completion, in seconds. Returns `0` if there is + no enough data to calculate the estimated time (this will happen on the approx. + first 5 seconds of each download). + + :param human: If true, returns a human-readable formatted string. Else, returns an int type number + :type human: bool + :rtype: int/string + ''' + if human: + s = utils.time_human(self.control_thread.get_eta()) + return s if s else "TBD" + return self.control_thread.get_eta() + + def get_speed(self, human=False): + ''' + Get current transfer speed in bytes per second. + + :param human: If true, returns a human-readable formatted string. Else, returns an int type number + :type human: bool + :rtype: int/string + ''' + if human: + return "{}/s".format(utils.sizeof_human(self.control_thread.get_speed())) + return self.control_thread.get_speed() + + def get_progress(self): + ''' + Returns the current progress of the download, as a float between `0` and `1`. + + :rtype: float + ''' + if not self.filesize: + return 0 + if self.control_thread.get_dl_size() <= self.filesize: + return 1.0*self.control_thread.get_dl_size()/self.filesize + return 1.0 + + def get_progress_bar(self, length=20): + ''' + Returns the current progress of the download as a string containing a progress bar. + + .. NOTE:: + That's an alias for pySmartDL.utils.progress_bar(obj.get_progress()). + + :param length: The length of the progress bar in chars. Default is 20. + :type length: int + :rtype: string + ''' + return utils.progress_bar(self.get_progress(), length) + + def isFinished(self): + ''' + Returns if the task is finished. + + :rtype: bool + ''' + if self.status == "ready": + return False + if self.status == "finished": + return True + return not self.post_threadpool_thread.is_alive() + + def isSuccessful(self): + ''' + Returns if the download is successfull. It may fail in the following scenarios: + + - Hash check is enabled and fails. + - All mirrors are down. + - Any local I/O problems (such as `no disk space available`). + + .. NOTE:: + Call `get_errors()` to get the exceptions, if any. + + Will raise `RuntimeError` if it's called when the download task is not finished yet. + + :rtype: bool + ''' + + if self._killed: + return False + + n = 0 + while self.status != 'finished': + n += 1 + time.sleep(0.1) + if n >= 15: + raise RuntimeError("The download task must be finished in order to see if it's successful. (current status is {})".format(self.status)) + + return not self._failed + + def get_errors(self): + ''' + Get errors happened while downloading. + + :rtype: list of `Exception` instances + ''' + return self.errors + + def get_status(self): + ''' + Returns the current status of the task. Possible values: *ready*, + *downloading*, *paused*, *combining*, *finished*. + + :rtype: string + ''' + return self.status + + def wait(self, raise_exceptions=False): + ''' + Blocks until the download is finished. + + :param raise_exceptions: If true, this function will raise exceptions. Default is *False*. + :type raise_exceptions: bool + ''' + if self.status in ["ready", "finished"]: + return + + while not self.isFinished(): + time.sleep(0.1) + self.post_threadpool_thread.join() + self.control_thread.join() + + if self._failed and raise_exceptions: + raise self.errors[-1] + + def stop(self): + ''' + Stops the download. + ''' + if self.status == "downloading": + self.thread_shared_cmds['stop'] = "" + self._killed = True + + def pause(self): + ''' + Pauses the download. + ''' + if self.status == "downloading": + self.status = "paused" + self.thread_shared_cmds['pause'] = "" + + def resume(self): + ''' + Continues the download. same as unpause(). + ''' + self.unpause() + + def unpause(self): + ''' + Continues the download. same as resume(). + ''' + if self.status == "paused" and 'pause' in self.thread_shared_cmds: + self.status = "downloading" + del self.thread_shared_cmds['pause'] + + def limit_speed(self, speed): + ''' + Limits the download transfer speed. + + :param speed: Speed in bytes per download per second. Negative values will not limit the speed. Default is `-1`. + :type speed: int + ''' + if self.status == "downloading": + if speed == 0: + self.pause() + else: + self.unpause() + + if speed > 0: + self.thread_shared_cmds['limit'] = speed/self.threads_count + elif 'limit' in self.thread_shared_cmds: + del self.thread_shared_cmds['limit'] + + def get_dest(self): + ''' + Get the destination path of the downloaded file. Needed when no + destination is provided to the class, and exists on a temp folder. + + :rtype: string + ''' + return self.dest + def get_dl_time(self, human=False): + ''' + Returns how much time did the download take, in seconds. Returns + `-1` if the download task is not finished yet. + + :param human: If true, returns a human-readable formatted string. Else, returns an int type number + :type human: bool + :rtype: int/string + ''' + if not self.control_thread: + return 0 + if human: + return utils.time_human(self.control_thread.get_dl_time()) + return self.control_thread.get_dl_time() + + def get_dl_size(self, human=False): + ''' + Get downloaded bytes counter in bytes. + + :param human: If true, returns a human-readable formatted string. Else, returns an int type number + :type human: bool + :rtype: int/string + ''' + if not self.control_thread: + return 0 + if human: + return utils.sizeof_human(self.control_thread.get_dl_size()) + return self.control_thread.get_dl_size() + + def get_final_filesize(self, human=False): + ''' + Get total download size in bytes. + + :param human: If true, returns a human-readable formatted string. Else, returns an int type number + :type human: bool + :rtype: int/string + ''' + if not self.control_thread: + return 0 + if human: + return utils.sizeof_human(self.control_thread.get_final_filesize()) + return self.control_thread.get_final_filesize() + + + def get_data(self, binary=False, bytes=-1): + ''' + Returns the downloaded data. Will raise `RuntimeError` if it's + called when the download task is not finished yet. + + :param binary: If true, will read the data as binary. Else, will read it as text. + :type binary: bool + :param bytes: Number of bytes to read. Negative values will read until EOF. Default is `-1`. + :type bytes: int + :rtype: string + ''' + if self.status != 'finished': + raise RuntimeError("The download task must be finished in order to read the data. (current status is %s)" % self.status) + + flags = 'rb' if binary else 'r' + with open(self.get_dest(), flags) as f: + data = f.read(bytes) if bytes>0 else f.read() + return data + + def get_data_hash(self, algorithm): + ''' + Returns the downloaded data's hash. Will raise `RuntimeError` if it's + called when the download task is not finished yet. + + :param algorithm: Hashing algorithm. + :type algorithm: bool + :rtype: string + + .. WARNING:: + The hashing algorithm must be supported on your system, as documented at `hashlib documentation page `_. + ''' + return hashlib.new(algorithm, self.get_data(binary=True)).hexdigest() + + def get_json(self): + ''' + Returns the JSON in the downloaded data. Will raise `RuntimeError` if it's + called when the download task is not finished yet. Will raise `json.decoder.JSONDecodeError` + if the downloaded data is not valid JSON. + + :rtype: dict + ''' + data = self.get_data() + return json.loads(data) + +def post_threadpool_actions(pool, args, expected_filesize, SmartDLObj): + "Run function after thread pool is done. Run this in a thread." + while not pool.done(): + time.sleep(0.1) + + if SmartDLObj._killed: + return + + if pool.get_exception(): + for exc in pool.get_exceptions(): + SmartDLObj.logger.exception(exc) + + SmartDLObj.retry(str(pool.get_exception())) + + if SmartDLObj._failed: + SmartDLObj.logger.warning("Task had errors. Exiting...") + return + + if expected_filesize: # if not zero, expected filesize is known + threads = len(args[0]) + total_filesize = sum([os.path.getsize(x) for x in args[0]]) + diff = math.fabs(expected_filesize - total_filesize) + + # if the difference is more than 4*thread numbers (because a thread may download 4KB extra per thread because of NTFS's block size) + if diff > 4*1024*threads: + errMsg = 'Diff between downloaded files and expected filesizes is {}B (filesize: {}, expected_filesize: {}, {} threads).'.format(total_filesize, expected_filesize, diff, threads) + SmartDLObj.logger.warning(errMsg) + SmartDLObj.retry(errMsg) + return + + SmartDLObj.status = "combining" + utils.combine_files(*args) + + if SmartDLObj.verify_hash: + dest_path = args[-1] + hash_ = utils.get_file_hash(SmartDLObj.hash_algorithm, dest_path) + + if hash_ == SmartDLObj.hash_code: + SmartDLObj.logger.info('Hash verification succeeded.') + else: + SmartDLObj.logger.warning('Hash verification failed.') + SmartDLObj.try_next_mirror(HashFailedException(os.path.basename(dest_path), hash, SmartDLObj.hash_code)) diff --git a/template/pySmartDL/utils.py b/template/pySmartDL/utils.py new file mode 100644 index 0000000..a94f2c2 --- /dev/null +++ b/template/pySmartDL/utils.py @@ -0,0 +1,370 @@ +# -*- coding: utf-8 -*- +''' +The Utils class contains many functions for project-wide use. +''' + +import os +import sys +import urllib.request, urllib.parse, urllib.error +import random +import logging +import re +import hashlib +from concurrent import futures +from math import log, ceil +import shutil + +DEFAULT_LOGGER_CREATED = False + +def combine_files(parts, dest, chunkSize = 1024 * 1024 * 4): + ''' + Combines files. + + :param parts: Source files. + :type parts: list of strings + :param dest: Destination file. + :type dest: string + :param chunkSize: Fetching chunk size. + :type chunkSize: int + + ''' + if len(parts) == 1: + shutil.move(parts[0], dest) + else: + with open(dest, 'wb') as output: + for part in parts: + with open(part, 'rb') as input: + data = input.read(chunkSize) + while data: + output.write(data) + data = input.read(chunkSize) + os.remove(part) + +def url_fix(s, charset='utf-8'): + ''' + Sometimes you get an URL by a user that just isn't a real + URL because it contains unsafe characters like ' ' and so on. This + function can fix some of the problems in a similar way browsers + handle data entered by the user: + + >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)') + 'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29' + + :param s: Url address. + :type s: string + :param charset: The target charset for the URL if the url was + given as unicode string. Default is 'utf-8'. + :type charset: string + :rtype: string + + (taken from `werkzeug.utils `_) + ''' + scheme, netloc, path, qs, anchor = urllib.parse.urlsplit(s) + path = urllib.parse.quote(path, '/%') + qs = urllib.parse.quote_plus(qs, ':&%=') + return urllib.parse.urlunsplit((scheme, netloc, path, qs, anchor)) + +def progress_bar(progress, length=20): + ''' + Returns a textual progress bar. + + >>> progress_bar(0.6) + '[##########--------]' + + :param progress: Number between 0 and 1 describes the progress. + :type progress: float + :param length: The length of the progress bar in chars. Default is 20. + :type length: int + :rtype: string + ''' + length -= 2 # The brackets are 2 chars long. + if progress < 0: + progress = 0 + if progress > 1: + progress = 1 + return "[" + "#"*int(progress*length) + "-"*(length-int(progress*length)) + "]" + +def is_HTTPRange_supported(url, timeout=15): + ''' + Checks if a server allows `Byte serving `_, + using the Range HTTP request header and the Accept-Ranges and Content-Range HTTP response headers. + + :param url: Url address. + :type url: string + :param timeout: Timeout in seconds. Default is 15. + :type timeout: int + :rtype: bool + ''' + url = url.replace(' ', '%20') + + fullsize = get_filesize(url, timeout=timeout) + if not fullsize: + return False + + headers = {'Range': 'bytes=0-3'} + req = urllib.request.Request(url, headers=headers) + urlObj = urllib.request.urlopen(req, timeout=timeout) + urlObj.close() + + if "Content-Length" not in urlObj.headers: + return False + + filesize = int(urlObj.headers["Content-Length"]) + return filesize != fullsize + +def get_filesize(url, timeout=15): + ''' + Fetches file's size of a file over HTTP. + + :param url: Url address. + :type url: string + :param timeout: Timeout in seconds. Default is 15. + :type timeout: int + :returns: Size in bytes. + :rtype: int + ''' + try: + urlObj = urllib.request.urlopen(url, timeout=timeout) + file_size = int(urlObj.headers["Content-Length"]) + except (IndexError, KeyError, TypeError, urllib.error.HTTPError, urllib.error.URLError): + return 0 + + return file_size + +def get_random_useragent(): + ''' + Returns a random popular user-agent. + Taken from `here `_, last updated on 2020/09/19. + + :returns: user-agent + :rtype: string + ''' + l = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/80.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:80.0) Gecko/20100101 Firefox/80.0", + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36 Edg/85.0.564.44", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36 Edg/85.0.564.51", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:79.0) Gecko/20100101 Firefox/79.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15" + ] + return random.choice(l) + +def sizeof_human(num): + ''' + Human-readable formatting for filesizes. Taken from `here `_. + + >>> sizeof_human(175799789) + '167.7 MB' + + :param num: Size in bytes. + :type num: int + + :rtype: string + ''' + unit_list = list(zip(['B', 'kB', 'MB', 'GB', 'TB', 'PB'], [0, 0, 1, 2, 2, 2])) + + if num > 1: + exponent = min(int(log(num, 1024)), len(unit_list) - 1) + quotient = float(num) / 1024**exponent + unit, num_decimals = unit_list[exponent] + + format_string = '{:,.%sf} {}' % (num_decimals) + return format_string.format(quotient, unit) + + if num == 0: + return '0 bytes' + if num == 1: + return '1 byte' + +def time_human(duration, fmt_short=False, show_ms=False): + ''' + Human-readable formatting for timing. Based on code from `here `_. + + >>> time_human(175799789) + '6 years, 2 weeks, 4 days, 17 hours, 16 minutes, 29 seconds' + >>> time_human(589, fmt_short=True) + '9m49s' + + :param duration: Duration in seconds. + :type duration: int/float + :param fmt_short: Format as a short string (`47s` instead of `47 seconds`) + :type fmt_short: bool + :param show_ms: Specify milliseconds in the string. + :type show_ms: bool + :rtype: string + ''' + ms = int(duration % 1 * 1000) + duration = int(duration) + if duration == 0 and (not show_ms or ms == 0): + return "0s" if fmt_short else "0 seconds" + + INTERVALS = [1, 60, 3600, 86400, 604800, 2419200, 29030400] + if fmt_short: + NAMES = ['s'*2, 'm'*2, 'h'*2, 'd'*2, 'w'*2, 'y'*2] + else: + NAMES = [ + ('second', 'seconds'), + ('minute', 'minutes'), + ('hour', 'hours'), + ('day', 'days'), + ('week', 'weeks'), + ('month', 'months'), + ('year', 'years') + ] + + result = [] + + for i in range(len(NAMES)-1, -1, -1): + a = duration // INTERVALS[i] + if a > 0: + result.append( (a, NAMES[i][1 % a]) ) + duration -= a * INTERVALS[i] + + if show_ms and ms > 0: + result.append((ms, "ms" if fmt_short else "milliseconds")) + + if fmt_short: + return "".join(["%s%s" % x for x in result]) + return ", ".join(["%s %s" % x for x in result]) + +def get_file_hash(algorithm, path): + ''' + Calculates a file's hash. + + .. WARNING:: + The hashing algorithm must be supported on your system, as documented at `hashlib documentation page `_. + + :param algorithm: Hashing algorithm. + :type algorithm: string + :param path: The file path + :type path: string + :rtype: string + ''' + hashAlg = hashlib.new(algorithm) + block_sz = 1*1024**2 # 1 MB + + with open(path, 'rb') as f: + data = f.read(block_sz) + while data: + hashAlg.update(data) + data = f.read(block_sz) + + return hashAlg.hexdigest() + +def calc_chunk_size(filesize, threads, minChunkFile): + ''' + Calculates the byte chunks to download. + + :param filesize: filesize in bytes. + :type filesize: int + :param threads: Number of trheads + :type threads: int + :param minChunkFile: Minimum chunk size + :type minChunkFile: int + :rtype: Array of (startByte,endByte) tuples + ''' + if not filesize: + return [(0, 0)] + + while ceil(filesize/threads) < minChunkFile and threads > 1: + threads -= 1 + + args = [] + pos = 0 + chunk = ceil(filesize/threads) + for i in range(threads): + startByte = pos + endByte = pos + chunk + if endByte > filesize-1: + endByte = filesize-1 + args.append((startByte, endByte)) + pos += chunk+1 + + return args + +def create_debugging_logger(): + ''' + Creates a debugging logger that prints to console. + + :rtype: `logging.Logger` instance + ''' + global DEFAULT_LOGGER_CREATED + + t_log = logging.getLogger('pySmartDL') + + if not DEFAULT_LOGGER_CREATED: + t_log.setLevel(logging.DEBUG) + console = logging.StreamHandler() + console.setLevel(logging.DEBUG) + console.setFormatter(logging.Formatter('[%(levelname)s||%(thread)d@{%(pathname)s:%(lineno)d}] %(message)s')) + t_log.addHandler(console) + DEFAULT_LOGGER_CREATED = True + + return t_log + +class DummyLogger(object): + ''' + A dummy logger. You can call `debug()`, `warning()`, etc on this object, and nothing will happen. + ''' + def __init__(self): + pass + + def dummy_func(self, *args, **kargs): + pass + + def __getattr__(self, name): + if name.startswith('__'): + return object.__getattr__(name) + return self.dummy_func + +class ManagedThreadPoolExecutor(futures.ThreadPoolExecutor): + ''' + Managed Thread Pool Executor. A subclass of ThreadPoolExecutor. + ''' + def __init__(self, max_workers): + futures.ThreadPoolExecutor.__init__(self, max_workers) + self._futures = [] + + def submit(self, fn, *args, **kwargs): + future = super().submit(fn, *args, **kwargs) + self._futures.append(future) + return future + + def done(self): + return all([x.done() for x in self._futures]) + + def get_exceptions(self): + ''' + Return all the exceptions raised. + + :rtype: List of `Exception` instances''' + l = [] + for x in self._futures: + if x.exception(): + l.append(x.exception()) + return l + + def get_exception(self): + ''' + Returns only the first exception. Returns None if no exception was raised. + + :rtype: `Exception` instance + ''' + for x in self._futures: + if x.exception(): + return x.exception() + return None