You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
676 lines
25 KiB
676 lines
25 KiB
3 years ago
|
import os
|
||
|
import sys
|
||
|
import urllib.request, urllib.error, urllib.parse
|
||
|
import copy
|
||
|
import threading
|
||
|
import time
|
||
|
import math
|
||
|
import tempfile
|
||
|
import base64
|
||
|
import hashlib
|
||
|
import socket
|
||
|
import logging
|
||
|
from io import StringIO
|
||
|
import multiprocessing.dummy as multiprocessing
|
||
|
from ctypes import c_int
|
||
|
import json
|
||
|
import ssl
|
||
|
|
||
|
from . import utils
|
||
|
from .control_thread import ControlThread
|
||
|
from .download import download
|
||
|
|
||
|
__all__ = ['SmartDL', 'utils']
|
||
|
__version_mjaor__ = 1
|
||
|
__version_minor__ = 3
|
||
|
__version_micro__ = 4
|
||
|
__version__ = "{}.{}.{}".format(__version_mjaor__, __version_minor__, __version_micro__)
|
||
|
|
||
|
class HashFailedException(Exception):
|
||
|
"Raised when hash check fails."
|
||
|
def __init__(self, fn, calc_hash, needed_hash):
|
||
|
self.filename = fn
|
||
|
self.calculated_hash = calc_hash
|
||
|
self.needed_hash = needed_hash
|
||
|
def __str__(self):
|
||
|
return 'HashFailedException({}, got {}, expected {})'.format(self.filename, self.calculated_hash, self.needed_hash)
|
||
|
def __repr__(self):
|
||
|
return '<HashFailedException {}, got {}, expected {}>'.format(self.filename, self.calculated_hash, self.needed_hash)
|
||
|
|
||
|
class CanceledException(Exception):
|
||
|
"Raised when the job is canceled."
|
||
|
def __init__(self):
|
||
|
pass
|
||
|
def __str__(self):
|
||
|
return 'CanceledException'
|
||
|
def __repr__(self):
|
||
|
return "<CanceledException>"
|
||
|
|
||
|
class SmartDL:
|
||
|
'''
|
||
|
The main SmartDL class
|
||
|
|
||
|
:param urls: Download url. It is possible to pass unsafe and unicode characters. You can also pass a list of urls, and those will be used as mirrors.
|
||
|
:type urls: string or list of strings
|
||
|
:param dest: Destination path. Default is `%TEMP%/pySmartDL/`.
|
||
|
:type dest: string
|
||
|
:param progress_bar: If True, prints a progress bar to the `stdout stream <http://docs.python.org/2/library/sys.html#sys.stdout>`_. Default is `True`.
|
||
|
:type progress_bar: bool
|
||
|
:param fix_urls: If true, attempts to fix urls with unsafe characters.
|
||
|
:type fix_urls: bool
|
||
|
:param threads: Number of threads to use.
|
||
|
:type threads: int
|
||
|
:param timeout: Timeout for network operations, in seconds. Default is 5.
|
||
|
:type timeout: int
|
||
|
:param logger: An optional logger.
|
||
|
:type logger: `logging.Logger` instance
|
||
|
:param connect_default_logger: If true, connects a default logger to the class.
|
||
|
:type connect_default_logger: bool
|
||
|
:param request_args: Arguments to be passed to a new urllib.request.Request instance in dictionary form. See `urllib.request docs <https://docs.python.org/3/library/urllib.request.html#urllib.request.Request>`_ for options.
|
||
|
:type request_args: dict
|
||
|
:rtype: `SmartDL` instance
|
||
|
:param verify: If ssl certificates should be validated.
|
||
|
:type verify: bool
|
||
|
|
||
|
.. NOTE::
|
||
|
The provided dest may be a folder or a full path name (including filename). The workflow is:
|
||
|
|
||
|
* If the path exists, and it's an existing folder, the file will be downloaded to there with the original filename.
|
||
|
* If the past does not exist, it will create the folders, if needed, and refer to the last section of the path as the filename.
|
||
|
* If you want to download to folder that does not exist at the moment, and want the module to fill in the filename, make sure the path ends with `os.sep`.
|
||
|
* If no path is provided, `%TEMP%/pySmartDL/` will be used.
|
||
|
'''
|
||
|
|
||
|
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, threads=5, timeout=5, logger=None, connect_default_logger=False, request_args=None, verify=True):
|
||
|
if logger:
|
||
|
self.logger = logger
|
||
|
elif connect_default_logger:
|
||
|
self.logger = utils.create_debugging_logger()
|
||
|
else:
|
||
|
self.logger = utils.DummyLogger()
|
||
|
if request_args:
|
||
|
if "headers" not in request_args:
|
||
|
request_args["headers"] = dict()
|
||
|
self.requestArgs = request_args
|
||
|
else:
|
||
|
self.requestArgs = {"headers": dict()}
|
||
|
if "User-Agent" not in self.requestArgs["headers"]:
|
||
|
self.requestArgs["headers"]["User-Agent"] = utils.get_random_useragent()
|
||
|
self.mirrors = [urls] if isinstance(urls, str) else urls
|
||
|
if fix_urls:
|
||
|
self.mirrors = [utils.url_fix(x) for x in self.mirrors]
|
||
|
self.url = self.mirrors.pop(0)
|
||
|
self.logger.info('Using url "{}"'.format(self.url))
|
||
|
|
||
|
fn = urllib.parse.unquote(os.path.basename(urllib.parse.urlparse(self.url).path))
|
||
|
self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn)
|
||
|
if self.dest[-1] == os.sep:
|
||
|
if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]):
|
||
|
os.unlink(self.dest[:-1])
|
||
|
self.dest += fn
|
||
|
if os.path.isdir(self.dest):
|
||
|
self.dest = os.path.join(self.dest, fn)
|
||
|
|
||
|
self.progress_bar = progress_bar
|
||
|
self.threads_count = threads
|
||
|
self.timeout = timeout
|
||
|
self.current_attemp = 1
|
||
|
self.attemps_limit = 4
|
||
|
self.minChunkFile = 1024**2*2 # 2MB
|
||
|
self.filesize = 0
|
||
|
self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded
|
||
|
self.thread_shared_cmds = {}
|
||
|
self.status = "ready"
|
||
|
self.verify_hash = False
|
||
|
self._killed = False
|
||
|
self._failed = False
|
||
|
self._start_func_blocking = True
|
||
|
self.errors = []
|
||
|
|
||
|
self.post_threadpool_thread = None
|
||
|
self.control_thread = None
|
||
|
|
||
|
if not os.path.exists(os.path.dirname(self.dest)):
|
||
|
self.logger.info('Folder "{}" does not exist. Creating...'.format(os.path.dirname(self.dest)))
|
||
|
os.makedirs(os.path.dirname(self.dest))
|
||
|
if not utils.is_HTTPRange_supported(self.url, timeout=self.timeout):
|
||
|
self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.")
|
||
|
self.threads_count = 1
|
||
|
if os.path.exists(self.dest):
|
||
|
self.logger.warning('Destination "{}" already exists. Existing file will be removed.'.format(self.dest))
|
||
|
if not os.path.exists(os.path.dirname(self.dest)):
|
||
|
self.logger.warning('Directory "{}" does not exist. Creating it...'.format(os.path.dirname(self.dest)))
|
||
|
os.makedirs(os.path.dirname(self.dest))
|
||
|
|
||
|
self.logger.info("Creating a ThreadPool of {} thread(s).".format(self.threads_count))
|
||
|
self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
|
||
|
|
||
|
if verify:
|
||
|
self.context = None
|
||
|
else:
|
||
|
self.context = ssl.create_default_context()
|
||
|
self.context.check_hostname = False
|
||
|
self.context.verify_mode = ssl.CERT_NONE
|
||
|
|
||
|
def __str__(self):
|
||
|
return 'SmartDL(r"{}", dest=r"{}")'.format(self.url, self.dest)
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "<SmartDL {}>".format(self.url)
|
||
|
|
||
|
def add_basic_authentication(self, username, password):
|
||
|
'''
|
||
|
Uses HTTP Basic Access authentication for the connection.
|
||
|
|
||
|
:param username: Username.
|
||
|
:type username: string
|
||
|
:param password: Password.
|
||
|
:type password: string
|
||
|
'''
|
||
|
auth_string = '{}:{}'.format(username, password)
|
||
|
base64string = base64.standard_b64encode(auth_string.encode('utf-8'))
|
||
|
self.requestArgs['headers']['Authorization'] = b"Basic " + base64string
|
||
|
|
||
|
def add_hash_verification(self, algorithm, hash):
|
||
|
'''
|
||
|
Adds hash verification to the download.
|
||
|
|
||
|
If hash is not correct, will try different mirrors. If all mirrors aren't
|
||
|
passing hash verification, `HashFailedException` Exception will be raised.
|
||
|
|
||
|
.. NOTE::
|
||
|
If downloaded file already exist on the destination, and hash matches, pySmartDL will not download it again.
|
||
|
|
||
|
.. WARNING::
|
||
|
The hashing algorithm must be supported on your system, as documented at `hashlib documentation page <http://docs.python.org/3/library/hashlib.html>`_.
|
||
|
|
||
|
:param algorithm: Hashing algorithm.
|
||
|
:type algorithm: string
|
||
|
:param hash: Hash code.
|
||
|
:type hash: string
|
||
|
'''
|
||
|
|
||
|
self.verify_hash = True
|
||
|
self.hash_algorithm = algorithm
|
||
|
self.hash_code = hash
|
||
|
|
||
|
def fetch_hash_sums(self):
|
||
|
'''
|
||
|
Will attempt to fetch UNIX hash sums files (`SHA256SUMS`, `SHA1SUMS` or `MD5SUMS` files in
|
||
|
the same url directory).
|
||
|
|
||
|
Calls `self.add_hash_verification` if successful. Returns if a matching hash was found.
|
||
|
|
||
|
:rtype: bool
|
||
|
|
||
|
*New in 1.2.1*
|
||
|
'''
|
||
|
default_sums_filenames = ['SHA256SUMS', 'SHA1SUMS', 'MD5SUMS']
|
||
|
folder = os.path.dirname(self.url)
|
||
|
orig_basename = os.path.basename(self.url)
|
||
|
|
||
|
self.logger.info("Looking for SUMS files...")
|
||
|
for filename in default_sums_filenames:
|
||
|
try:
|
||
|
sums_url = "%s/%s" % (folder, filename)
|
||
|
sumsRequest = urllib.request.Request(sums_url, **self.requestArgs)
|
||
|
obj = urllib.request.urlopen(sumsRequest)
|
||
|
data = obj.read().split('\n')
|
||
|
obj.close()
|
||
|
|
||
|
for line in data:
|
||
|
if orig_basename.lower() in line.lower():
|
||
|
self.logger.info("Found a matching hash in %s" % sums_url)
|
||
|
algo = filename.rstrip('SUMS')
|
||
|
hash = line.split(' ')[0]
|
||
|
self.add_hash_verification(algo, hash)
|
||
|
return
|
||
|
|
||
|
except urllib.error.HTTPError:
|
||
|
continue
|
||
|
|
||
|
def start(self, blocking=None):
|
||
|
'''
|
||
|
Starts the download task. Will raise `RuntimeError` if it's the object's already downloading.
|
||
|
|
||
|
.. warning::
|
||
|
If you're using the non-blocking mode, Exceptions won't be raised. In that case, call
|
||
|
`isSuccessful()` after the task is finished, to make sure the download succeeded. Call
|
||
|
`get_errors()` to get the the exceptions.
|
||
|
|
||
|
:param blocking: If true, calling this function will block the thread until the download finished. Default is *True*.
|
||
|
:type blocking: bool
|
||
|
'''
|
||
|
if not self.status == "ready":
|
||
|
raise RuntimeError("cannot start (current status is {})".format(self.status))
|
||
|
self.logger.info('Starting a new SmartDL operation.')
|
||
|
|
||
|
if blocking is None:
|
||
|
blocking = self._start_func_blocking
|
||
|
else:
|
||
|
self._start_func_blocking = blocking
|
||
|
|
||
|
if self.mirrors:
|
||
|
self.logger.info('One URL and {} mirrors are loaded.'.format(len(self.mirrors)))
|
||
|
else:
|
||
|
self.logger.info('One URL is loaded.')
|
||
|
|
||
|
if self.verify_hash and os.path.exists(self.dest):
|
||
|
if utils.get_file_hash(self.hash_algorithm, self.dest) == self.hash_code:
|
||
|
self.logger.info("Destination '%s' already exists, and the hash matches. No need to download." % self.dest)
|
||
|
self.status = 'finished'
|
||
|
return
|
||
|
|
||
|
self.logger.info("Downloading '{}' to '{}'...".format(self.url, self.dest))
|
||
|
req = urllib.request.Request(self.url, **self.requestArgs)
|
||
|
try:
|
||
|
urlObj = urllib.request.urlopen(req, timeout=self.timeout, context=self.context)
|
||
|
except (urllib.error.HTTPError, urllib.error.URLError, socket.timeout) as e:
|
||
|
self.errors.append(e)
|
||
|
if self.mirrors:
|
||
|
self.logger.info("{} Trying next mirror...".format(str(e)))
|
||
|
self.url = self.mirrors.pop(0)
|
||
|
self.logger.info('Using url "{}"'.format(self.url))
|
||
|
self.start(blocking)
|
||
|
return
|
||
|
else:
|
||
|
self.logger.warning(str(e))
|
||
|
self.errors.append(e)
|
||
|
self._failed = True
|
||
|
self.status = "finished"
|
||
|
raise
|
||
|
|
||
|
try:
|
||
|
self.filesize = int(urlObj.headers["Content-Length"])
|
||
|
self.logger.info("Content-Length is {} ({}).".format(self.filesize, utils.sizeof_human(self.filesize)))
|
||
|
except (IndexError, KeyError, TypeError):
|
||
|
self.logger.warning("Server did not send Content-Length. Filesize is unknown.")
|
||
|
self.filesize = 0
|
||
|
|
||
|
args = utils.calc_chunk_size(self.filesize, self.threads_count, self.minChunkFile)
|
||
|
bytes_per_thread = args[0][1] - args[0][0] + 1
|
||
|
if len(args)>1:
|
||
|
self.logger.info("Launching {} threads (downloads {}/thread).".format(len(args), utils.sizeof_human(bytes_per_thread)))
|
||
|
else:
|
||
|
self.logger.info("Launching 1 thread (downloads {}).".format(utils.sizeof_human(bytes_per_thread)))
|
||
|
|
||
|
self.status = "downloading"
|
||
|
|
||
|
for i, arg in enumerate(args):
|
||
|
req = self.pool.submit(
|
||
|
download,
|
||
|
self.url,
|
||
|
self.dest+".%.3d" % i,
|
||
|
self.requestArgs,
|
||
|
self.context,
|
||
|
arg[0],
|
||
|
arg[1],
|
||
|
self.timeout,
|
||
|
self.shared_var,
|
||
|
self.thread_shared_cmds,
|
||
|
self.logger
|
||
|
)
|
||
|
|
||
|
self.post_threadpool_thread = threading.Thread(
|
||
|
target=post_threadpool_actions,
|
||
|
args=(
|
||
|
self.pool,
|
||
|
[[(self.dest+".%.3d" % i) for i in range(len(args))], self.dest],
|
||
|
self.filesize,
|
||
|
self
|
||
|
)
|
||
|
)
|
||
|
self.post_threadpool_thread.daemon = True
|
||
|
self.post_threadpool_thread.start()
|
||
|
|
||
|
self.control_thread = ControlThread(self)
|
||
|
|
||
|
if blocking:
|
||
|
self.wait(raise_exceptions=True)
|
||
|
|
||
|
def _exc_callback(self, req, e):
|
||
|
self.errors.append(e[0])
|
||
|
self.logger.exception(e[1])
|
||
|
|
||
|
def retry(self, eStr=""):
|
||
|
if self.current_attemp < self.attemps_limit:
|
||
|
self.current_attemp += 1
|
||
|
self.status = "ready"
|
||
|
self.shared_var.value = 0
|
||
|
self.thread_shared_cmds = {}
|
||
|
self.start()
|
||
|
|
||
|
else:
|
||
|
s = 'The maximum retry attempts reached'
|
||
|
if eStr:
|
||
|
s += " ({})".format(eStr)
|
||
|
self.errors.append(urllib.error.HTTPError(self.url, "0", s, {}, StringIO()))
|
||
|
self._failed = True
|
||
|
|
||
|
def try_next_mirror(self, e=None):
|
||
|
if self.mirrors:
|
||
|
if e:
|
||
|
self.errors.append(e)
|
||
|
self.status = "ready"
|
||
|
self.shared_var.value = 0
|
||
|
self.url = self.mirrors.pop(0)
|
||
|
self.logger.info('Using url "{}"'.format(self.url))
|
||
|
self.start()
|
||
|
else:
|
||
|
self._failed = True
|
||
|
self.errors.append(e)
|
||
|
|
||
|
def get_eta(self, human=False):
|
||
|
'''
|
||
|
Get estimated time of download completion, in seconds. Returns `0` if there is
|
||
|
no enough data to calculate the estimated time (this will happen on the approx.
|
||
|
first 5 seconds of each download).
|
||
|
|
||
|
:param human: If true, returns a human-readable formatted string. Else, returns an int type number
|
||
|
:type human: bool
|
||
|
:rtype: int/string
|
||
|
'''
|
||
|
if human:
|
||
|
s = utils.time_human(self.control_thread.get_eta())
|
||
|
return s if s else "TBD"
|
||
|
return self.control_thread.get_eta()
|
||
|
|
||
|
def get_speed(self, human=False):
|
||
|
'''
|
||
|
Get current transfer speed in bytes per second.
|
||
|
|
||
|
:param human: If true, returns a human-readable formatted string. Else, returns an int type number
|
||
|
:type human: bool
|
||
|
:rtype: int/string
|
||
|
'''
|
||
|
if human:
|
||
|
return "{}/s".format(utils.sizeof_human(self.control_thread.get_speed()))
|
||
|
return self.control_thread.get_speed()
|
||
|
|
||
|
def get_progress(self):
|
||
|
'''
|
||
|
Returns the current progress of the download, as a float between `0` and `1`.
|
||
|
|
||
|
:rtype: float
|
||
|
'''
|
||
|
if not self.filesize:
|
||
|
return 0
|
||
|
if self.control_thread.get_dl_size() <= self.filesize:
|
||
|
return 1.0*self.control_thread.get_dl_size()/self.filesize
|
||
|
return 1.0
|
||
|
|
||
|
def get_progress_bar(self, length=20):
|
||
|
'''
|
||
|
Returns the current progress of the download as a string containing a progress bar.
|
||
|
|
||
|
.. NOTE::
|
||
|
That's an alias for pySmartDL.utils.progress_bar(obj.get_progress()).
|
||
|
|
||
|
:param length: The length of the progress bar in chars. Default is 20.
|
||
|
:type length: int
|
||
|
:rtype: string
|
||
|
'''
|
||
|
return utils.progress_bar(self.get_progress(), length)
|
||
|
|
||
|
def isFinished(self):
|
||
|
'''
|
||
|
Returns if the task is finished.
|
||
|
|
||
|
:rtype: bool
|
||
|
'''
|
||
|
if self.status == "ready":
|
||
|
return False
|
||
|
if self.status == "finished":
|
||
|
return True
|
||
|
return not self.post_threadpool_thread.is_alive()
|
||
|
|
||
|
def isSuccessful(self):
|
||
|
'''
|
||
|
Returns if the download is successfull. It may fail in the following scenarios:
|
||
|
|
||
|
- Hash check is enabled and fails.
|
||
|
- All mirrors are down.
|
||
|
- Any local I/O problems (such as `no disk space available`).
|
||
|
|
||
|
.. NOTE::
|
||
|
Call `get_errors()` to get the exceptions, if any.
|
||
|
|
||
|
Will raise `RuntimeError` if it's called when the download task is not finished yet.
|
||
|
|
||
|
:rtype: bool
|
||
|
'''
|
||
|
|
||
|
if self._killed:
|
||
|
return False
|
||
|
|
||
|
n = 0
|
||
|
while self.status != 'finished':
|
||
|
n += 1
|
||
|
time.sleep(0.1)
|
||
|
if n >= 15:
|
||
|
raise RuntimeError("The download task must be finished in order to see if it's successful. (current status is {})".format(self.status))
|
||
|
|
||
|
return not self._failed
|
||
|
|
||
|
def get_errors(self):
|
||
|
'''
|
||
|
Get errors happened while downloading.
|
||
|
|
||
|
:rtype: list of `Exception` instances
|
||
|
'''
|
||
|
return self.errors
|
||
|
|
||
|
def get_status(self):
|
||
|
'''
|
||
|
Returns the current status of the task. Possible values: *ready*,
|
||
|
*downloading*, *paused*, *combining*, *finished*.
|
||
|
|
||
|
:rtype: string
|
||
|
'''
|
||
|
return self.status
|
||
|
|
||
|
def wait(self, raise_exceptions=False):
|
||
|
'''
|
||
|
Blocks until the download is finished.
|
||
|
|
||
|
:param raise_exceptions: If true, this function will raise exceptions. Default is *False*.
|
||
|
:type raise_exceptions: bool
|
||
|
'''
|
||
|
if self.status in ["ready", "finished"]:
|
||
|
return
|
||
|
|
||
|
while not self.isFinished():
|
||
|
time.sleep(0.1)
|
||
|
self.post_threadpool_thread.join()
|
||
|
self.control_thread.join()
|
||
|
|
||
|
if self._failed and raise_exceptions:
|
||
|
raise self.errors[-1]
|
||
|
|
||
|
def stop(self):
|
||
|
'''
|
||
|
Stops the download.
|
||
|
'''
|
||
|
if self.status == "downloading":
|
||
|
self.thread_shared_cmds['stop'] = ""
|
||
|
self._killed = True
|
||
|
|
||
|
def pause(self):
|
||
|
'''
|
||
|
Pauses the download.
|
||
|
'''
|
||
|
if self.status == "downloading":
|
||
|
self.status = "paused"
|
||
|
self.thread_shared_cmds['pause'] = ""
|
||
|
|
||
|
def resume(self):
|
||
|
'''
|
||
|
Continues the download. same as unpause().
|
||
|
'''
|
||
|
self.unpause()
|
||
|
|
||
|
def unpause(self):
|
||
|
'''
|
||
|
Continues the download. same as resume().
|
||
|
'''
|
||
|
if self.status == "paused" and 'pause' in self.thread_shared_cmds:
|
||
|
self.status = "downloading"
|
||
|
del self.thread_shared_cmds['pause']
|
||
|
|
||
|
def limit_speed(self, speed):
|
||
|
'''
|
||
|
Limits the download transfer speed.
|
||
|
|
||
|
:param speed: Speed in bytes per download per second. Negative values will not limit the speed. Default is `-1`.
|
||
|
:type speed: int
|
||
|
'''
|
||
|
if self.status == "downloading":
|
||
|
if speed == 0:
|
||
|
self.pause()
|
||
|
else:
|
||
|
self.unpause()
|
||
|
|
||
|
if speed > 0:
|
||
|
self.thread_shared_cmds['limit'] = speed/self.threads_count
|
||
|
elif 'limit' in self.thread_shared_cmds:
|
||
|
del self.thread_shared_cmds['limit']
|
||
|
|
||
|
def get_dest(self):
|
||
|
'''
|
||
|
Get the destination path of the downloaded file. Needed when no
|
||
|
destination is provided to the class, and exists on a temp folder.
|
||
|
|
||
|
:rtype: string
|
||
|
'''
|
||
|
return self.dest
|
||
|
def get_dl_time(self, human=False):
|
||
|
'''
|
||
|
Returns how much time did the download take, in seconds. Returns
|
||
|
`-1` if the download task is not finished yet.
|
||
|
|
||
|
:param human: If true, returns a human-readable formatted string. Else, returns an int type number
|
||
|
:type human: bool
|
||
|
:rtype: int/string
|
||
|
'''
|
||
|
if not self.control_thread:
|
||
|
return 0
|
||
|
if human:
|
||
|
return utils.time_human(self.control_thread.get_dl_time())
|
||
|
return self.control_thread.get_dl_time()
|
||
|
|
||
|
def get_dl_size(self, human=False):
|
||
|
'''
|
||
|
Get downloaded bytes counter in bytes.
|
||
|
|
||
|
:param human: If true, returns a human-readable formatted string. Else, returns an int type number
|
||
|
:type human: bool
|
||
|
:rtype: int/string
|
||
|
'''
|
||
|
if not self.control_thread:
|
||
|
return 0
|
||
|
if human:
|
||
|
return utils.sizeof_human(self.control_thread.get_dl_size())
|
||
|
return self.control_thread.get_dl_size()
|
||
|
|
||
|
def get_final_filesize(self, human=False):
|
||
|
'''
|
||
|
Get total download size in bytes.
|
||
|
|
||
|
:param human: If true, returns a human-readable formatted string. Else, returns an int type number
|
||
|
:type human: bool
|
||
|
:rtype: int/string
|
||
|
'''
|
||
|
if not self.control_thread:
|
||
|
return 0
|
||
|
if human:
|
||
|
return utils.sizeof_human(self.control_thread.get_final_filesize())
|
||
|
return self.control_thread.get_final_filesize()
|
||
|
|
||
|
|
||
|
def get_data(self, binary=False, bytes=-1):
|
||
|
'''
|
||
|
Returns the downloaded data. Will raise `RuntimeError` if it's
|
||
|
called when the download task is not finished yet.
|
||
|
|
||
|
:param binary: If true, will read the data as binary. Else, will read it as text.
|
||
|
:type binary: bool
|
||
|
:param bytes: Number of bytes to read. Negative values will read until EOF. Default is `-1`.
|
||
|
:type bytes: int
|
||
|
:rtype: string
|
||
|
'''
|
||
|
if self.status != 'finished':
|
||
|
raise RuntimeError("The download task must be finished in order to read the data. (current status is %s)" % self.status)
|
||
|
|
||
|
flags = 'rb' if binary else 'r'
|
||
|
with open(self.get_dest(), flags) as f:
|
||
|
data = f.read(bytes) if bytes>0 else f.read()
|
||
|
return data
|
||
|
|
||
|
def get_data_hash(self, algorithm):
|
||
|
'''
|
||
|
Returns the downloaded data's hash. Will raise `RuntimeError` if it's
|
||
|
called when the download task is not finished yet.
|
||
|
|
||
|
:param algorithm: Hashing algorithm.
|
||
|
:type algorithm: bool
|
||
|
:rtype: string
|
||
|
|
||
|
.. WARNING::
|
||
|
The hashing algorithm must be supported on your system, as documented at `hashlib documentation page <http://docs.python.org/3/library/hashlib.html>`_.
|
||
|
'''
|
||
|
return hashlib.new(algorithm, self.get_data(binary=True)).hexdigest()
|
||
|
|
||
|
def get_json(self):
|
||
|
'''
|
||
|
Returns the JSON in the downloaded data. Will raise `RuntimeError` if it's
|
||
|
called when the download task is not finished yet. Will raise `json.decoder.JSONDecodeError`
|
||
|
if the downloaded data is not valid JSON.
|
||
|
|
||
|
:rtype: dict
|
||
|
'''
|
||
|
data = self.get_data()
|
||
|
return json.loads(data)
|
||
|
|
||
|
def post_threadpool_actions(pool, args, expected_filesize, SmartDLObj):
|
||
|
"Run function after thread pool is done. Run this in a thread."
|
||
|
while not pool.done():
|
||
|
time.sleep(0.1)
|
||
|
|
||
|
if SmartDLObj._killed:
|
||
|
return
|
||
|
|
||
|
if pool.get_exception():
|
||
|
for exc in pool.get_exceptions():
|
||
|
SmartDLObj.logger.exception(exc)
|
||
|
|
||
|
SmartDLObj.retry(str(pool.get_exception()))
|
||
|
|
||
|
if SmartDLObj._failed:
|
||
|
SmartDLObj.logger.warning("Task had errors. Exiting...")
|
||
|
return
|
||
|
|
||
|
if expected_filesize: # if not zero, expected filesize is known
|
||
|
threads = len(args[0])
|
||
|
total_filesize = sum([os.path.getsize(x) for x in args[0]])
|
||
|
diff = math.fabs(expected_filesize - total_filesize)
|
||
|
|
||
|
# if the difference is more than 4*thread numbers (because a thread may download 4KB extra per thread because of NTFS's block size)
|
||
|
if diff > 4*1024*threads:
|
||
|
errMsg = 'Diff between downloaded files and expected filesizes is {}B (filesize: {}, expected_filesize: {}, {} threads).'.format(total_filesize, expected_filesize, diff, threads)
|
||
|
SmartDLObj.logger.warning(errMsg)
|
||
|
SmartDLObj.retry(errMsg)
|
||
|
return
|
||
|
|
||
|
SmartDLObj.status = "combining"
|
||
|
utils.combine_files(*args)
|
||
|
|
||
|
if SmartDLObj.verify_hash:
|
||
|
dest_path = args[-1]
|
||
|
hash_ = utils.get_file_hash(SmartDLObj.hash_algorithm, dest_path)
|
||
|
|
||
|
if hash_ == SmartDLObj.hash_code:
|
||
|
SmartDLObj.logger.info('Hash verification succeeded.')
|
||
|
else:
|
||
|
SmartDLObj.logger.warning('Hash verification failed.')
|
||
|
SmartDLObj.try_next_mirror(HashFailedException(os.path.basename(dest_path), hash, SmartDLObj.hash_code))
|