Nils
3 years ago
14 changed files with 1754 additions and 63 deletions
Binary file not shown.
@ -0,0 +1,24 @@ |
|||
This is free and unencumbered software released into the public domain. |
|||
|
|||
Anyone is free to copy, modify, publish, use, compile, sell, or |
|||
distribute this software, either in source code form or as a compiled |
|||
binary, for any purpose, commercial or non-commercial, and by any |
|||
means. |
|||
|
|||
In jurisdictions that recognize copyright laws, the author or authors |
|||
of this software dedicate any and all copyright interest in the |
|||
software to the public domain. We make this dedication for the benefit |
|||
of the public at large and to the detriment of our heirs and |
|||
successors. We intend this dedication to be an overt act of |
|||
relinquishment in perpetuity of all present and future rights to this |
|||
software under copyright law. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
|||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
|||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|||
OTHER DEALINGS IN THE SOFTWARE. |
|||
|
|||
For more information, please refer to <http://unlicense.org/> |
@ -0,0 +1,59 @@ |
|||
Python Smart Download Manager -- pySmartDL |
|||
========================================== |
|||
|
|||
``pySmartDL`` strives to be a full-fledged smart download manager for Python. Main features: |
|||
|
|||
* Built-in download acceleration (with the `multipart downloading technique <http://stackoverflow.com/questions/93642/how-do-download-accelerators-work>`_). |
|||
* Mirrors support. |
|||
* Pause/Unpause feature. |
|||
* Speed limiting feature. |
|||
* Hash checking. |
|||
* Non-blocking, shows progress bar, download speed and eta. |
|||
* Full support for custom headers and methods. |
|||
* Python 3 Support |
|||
|
|||
Project Links |
|||
============= |
|||
|
|||
* Downloads: http://pypi.python.org/pypi/pySmartDL/ |
|||
* Documentation: http://itaybb.github.io/pySmartDL/ |
|||
* Project page: https://github.com/iTaybb/pySmartDL/ |
|||
* Bugs and Issues: https://github.com/iTaybb/pySmartDL/issues |
|||
|
|||
Installation |
|||
============ |
|||
|
|||
**Using pip (recommended way)** |
|||
|
|||
Make sure python-pip is installed on you system. If you are using virtualenv, then pip is alredy installed into environments created by virtualenv. Run pip to install pySmartDL: |
|||
|
|||
``pip install pySmartDL`` |
|||
|
|||
**From Source** |
|||
|
|||
The pySmartDL package is installed from source using distutils in the usual way. Download the `source distribution <http://pypi.python.org/pypi/pySmartDL>`_ first. Unpack the source zip and run the following to install the package site-wide: |
|||
|
|||
``python setup.py install`` |
|||
|
|||
Usage |
|||
===== |
|||
|
|||
Download is as simple as creating an instance and starting it: |
|||
|
|||
from pySmartDL import SmartDL |
|||
|
|||
url = "https://github.com/iTaybb/pySmartDL/raw/master/test/7za920.zip" |
|||
dest = "C:\\Downloads\\" # or '~/Downloads/' on linux |
|||
|
|||
obj = SmartDL(url, dest) |
|||
obj.start() |
|||
# [*] 0.23 Mb / 0.37 Mb @ 88.00Kb/s [##########--------] [60%, 2s left] |
|||
|
|||
path = obj.get_dest() |
|||
|
|||
Requirements |
|||
============== |
|||
|
|||
* Python 3.4 or greater. |
|||
|
|||
Copyright (C) 2014-2020 Itay Brandes. |
@ -0,0 +1,4 @@ |
|||
from .pySmartDL import SmartDL, HashFailedException, CanceledException |
|||
from . import utils |
|||
|
|||
__version__ = pySmartDL.__version__ |
@ -0,0 +1,119 @@ |
|||
import threading |
|||
import time |
|||
|
|||
from . import utils |
|||
|
|||
class ControlThread(threading.Thread): |
|||
"A class that shows information about a running SmartDL object." |
|||
def __init__(self, obj): |
|||
threading.Thread.__init__(self) |
|||
self.obj = obj |
|||
self.progress_bar = obj.progress_bar |
|||
self.logger = obj.logger |
|||
self.shared_var = obj.shared_var |
|||
|
|||
self.dl_speed = 0 |
|||
self.eta = 0 |
|||
self.lastBytesSamples = [] # list with last 50 Bytes Samples. |
|||
self.last_calculated_totalBytes = 0 |
|||
self.calcETA_queue = [] |
|||
self.calcETA_i = 0 |
|||
self.calcETA_val = 0 |
|||
self.dl_time = -1.0 |
|||
|
|||
self.daemon = True |
|||
self.start() |
|||
|
|||
def run(self): |
|||
t1 = time.time() |
|||
self.logger.info("Control thread has been started.") |
|||
|
|||
while not self.obj.pool.done(): |
|||
self.dl_speed = self.calcDownloadSpeed(self.shared_var.value) |
|||
if self.dl_speed > 0: |
|||
self.eta = self.calcETA((self.obj.filesize-self.shared_var.value)/self.dl_speed) |
|||
|
|||
if self.progress_bar: |
|||
if self.obj.filesize: |
|||
status = r"[*] %s / %s @ %s/s %s [%3.1f%%, %s left] " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0*self.shared_var.value/self.obj.filesize), self.shared_var.value * 100.0 / self.obj.filesize, utils.time_human(self.eta, fmt_short=True)) |
|||
else: |
|||
status = r"[*] %s / ??? MB @ %s/s " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.dl_speed)) |
|||
status = status + chr(8)*(len(status)+1) |
|||
print(status, end=' ', flush=True) |
|||
time.sleep(0.1) |
|||
|
|||
if self.obj._killed: |
|||
self.logger.info("File download process has been stopped.") |
|||
return |
|||
|
|||
if self.progress_bar: |
|||
if self.obj.filesize: |
|||
print(r"[*] %s / %s @ %s/s %s [100%%, 0s left] " % (utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0))) |
|||
else: |
|||
print(r"[*] %s / %s @ %s/s " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.dl_speed))) |
|||
|
|||
t2 = time.time() |
|||
self.dl_time = float(t2-t1) |
|||
|
|||
while self.obj.post_threadpool_thread.is_alive(): |
|||
time.sleep(0.1) |
|||
|
|||
self.obj.pool.shutdown() |
|||
self.obj.status = "finished" |
|||
if not self.obj.errors: |
|||
self.logger.info("File downloaded within %.2f seconds." % self.dl_time) |
|||
|
|||
def get_eta(self): |
|||
if self.eta <= 0 or self.obj.status == 'paused': |
|||
return 0 |
|||
return self.eta |
|||
def get_speed(self): |
|||
if self.obj.status == 'paused': |
|||
return 0 |
|||
return self.dl_speed |
|||
def get_dl_size(self): |
|||
if self.shared_var.value > self.obj.filesize: |
|||
return self.obj.filesize |
|||
return self.shared_var.value |
|||
def get_final_filesize(self): |
|||
return self.obj.filesize |
|||
def get_progress(self): |
|||
if not self.obj.filesize: |
|||
return 0 |
|||
return 1.0*self.shared_var.value/self.obj.filesize |
|||
def get_dl_time(self): |
|||
return self.dl_time |
|||
|
|||
def calcDownloadSpeed(self, totalBytes, sampleCount=30, sampleDuration=0.1): |
|||
''' |
|||
Function calculates the download rate. |
|||
@param totalBytes: The total amount of bytes. |
|||
@param sampleCount: How much samples should the function take into consideration. |
|||
@param sampleDuration: Duration of a sample in seconds. |
|||
''' |
|||
l = self.lastBytesSamples |
|||
newBytes = totalBytes - self.last_calculated_totalBytes |
|||
self.last_calculated_totalBytes = totalBytes |
|||
if newBytes >= 0: # newBytes may be negetive, will happen |
|||
# if a thread has crushed and the totalBytes counter got decreased. |
|||
if len(l) == sampleCount: # calc download for last 3 seconds (30 * 100ms per signal emit) |
|||
l.pop(0) |
|||
|
|||
l.append(newBytes) |
|||
|
|||
dlRate = sum(l)/len(l)/sampleDuration |
|||
return dlRate |
|||
|
|||
def calcETA(self, eta): |
|||
self.calcETA_i += 1 |
|||
l = self.calcETA_queue |
|||
l.append(eta) |
|||
|
|||
if self.calcETA_i % 10 == 0: |
|||
self.calcETA_val = sum(l)/len(l) |
|||
if len(l) == 30: |
|||
l.pop(0) |
|||
|
|||
if self.calcETA_i < 50: |
|||
return 0 |
|||
return self.calcETA_val |
@ -0,0 +1,88 @@ |
|||
import os |
|||
import urllib.request, urllib.error, urllib.parse |
|||
import time |
|||
from . import utils |
|||
|
|||
def download(url, dest, requestArgs=None, context=None, startByte=0, endByte=None, timeout=4, shared_var=None, thread_shared_cmds=None, logger=None, retries=3): |
|||
"The basic download function that runs at each thread." |
|||
logger = logger or utils.DummyLogger() |
|||
req = urllib.request.Request(url, **requestArgs) |
|||
if endByte: |
|||
req.add_header('Range', 'bytes={:.0f}-{:.0f}'.format(startByte, endByte)) |
|||
logger.info("Downloading '{}' to '{}'...".format(url, dest)) |
|||
try: |
|||
# Context is used to skip ssl validation if verify is False. |
|||
urlObj = urllib.request.urlopen(req, timeout=timeout, context=context) |
|||
except urllib.error.HTTPError as e: |
|||
if e.code == 416: |
|||
''' |
|||
HTTP 416 Error: Requested Range Not Satisfiable. Happens when we ask |
|||
for a range that is not available on the server. It will happen when |
|||
the server will try to send us a .html page that means something like |
|||
"you opened too many connections to our server". If this happens, we |
|||
will wait for the other threads to finish their connections and try again. |
|||
''' |
|||
|
|||
if retries > 0: |
|||
logger.warning("Thread didn't got the file it was expecting. Retrying ({} times left)...".format(retries-1)) |
|||
time.sleep(5) |
|||
return download(url, dest, requestArgs, startByte, endByte, timeout, shared_var, thread_shared_cmds, logger, retries-1) |
|||
else: |
|||
raise |
|||
else: |
|||
raise |
|||
|
|||
with open(dest, 'wb') as f: |
|||
if endByte: |
|||
filesize = endByte-startByte |
|||
else: |
|||
try: |
|||
meta = urlObj.info() |
|||
filesize = int(urlObj.headers["Content-Length"]) |
|||
logger.info("Content-Length is {}.".format(filesize)) |
|||
except (IndexError, KeyError, TypeError): |
|||
logger.warning("Server did not send Content-Length. Filesize is unknown.") |
|||
|
|||
filesize_dl = 0 # total downloaded size |
|||
limitspeed_timestamp = time.time() |
|||
limitspeed_filesize = 0 |
|||
block_sz = 8192 |
|||
while True: |
|||
if thread_shared_cmds: |
|||
if 'stop' in thread_shared_cmds: |
|||
logger.info('stop command received. Stopping.') |
|||
raise CanceledException() |
|||
if 'pause' in thread_shared_cmds: |
|||
time.sleep(0.2) |
|||
continue |
|||
if 'limit' in thread_shared_cmds: |
|||
now = time.time() |
|||
time_passed = now - limitspeed_timestamp |
|||
if time_passed > 0.1: # we only observe the limit after 100ms |
|||
# if we passed the limit, we should |
|||
if (filesize_dl-limitspeed_filesize)/time_passed >= thread_shared_cmds['limit']: |
|||
time_to_sleep = (filesize_dl-limitspeed_filesize) / thread_shared_cmds['limit'] |
|||
logger.debug('Thread has downloaded {} in {}. Limit is {}/s. Slowing down...'.format(utils.sizeof_human(filesize_dl-limitspeed_filesize), utils.time_human(time_passed, fmt_short=True, show_ms=True), utils.sizeof_human(thread_shared_cmds['limit']))) |
|||
time.sleep(time_to_sleep) |
|||
continue |
|||
else: |
|||
limitspeed_timestamp = now |
|||
limitspeed_filesize = filesize_dl |
|||
|
|||
try: |
|||
buff = urlObj.read(block_sz) |
|||
except Exception as e: |
|||
logger.error(str(e)) |
|||
if shared_var: |
|||
shared_var.value -= filesize_dl |
|||
raise |
|||
|
|||
if not buff: |
|||
break |
|||
|
|||
filesize_dl += len(buff) |
|||
if shared_var: |
|||
shared_var.value += len(buff) |
|||
f.write(buff) |
|||
|
|||
urlObj.close() |
@ -0,0 +1,675 @@ |
|||
import os |
|||
import sys |
|||
import urllib.request, urllib.error, urllib.parse |
|||
import copy |
|||
import threading |
|||
import time |
|||
import math |
|||
import tempfile |
|||
import base64 |
|||
import hashlib |
|||
import socket |
|||
import logging |
|||
from io import StringIO |
|||
import multiprocessing.dummy as multiprocessing |
|||
from ctypes import c_int |
|||
import json |
|||
import ssl |
|||
|
|||
from . import utils |
|||
from .control_thread import ControlThread |
|||
from .download import download |
|||
|
|||
__all__ = ['SmartDL', 'utils'] |
|||
__version_mjaor__ = 1 |
|||
__version_minor__ = 3 |
|||
__version_micro__ = 4 |
|||
__version__ = "{}.{}.{}".format(__version_mjaor__, __version_minor__, __version_micro__) |
|||
|
|||
class HashFailedException(Exception): |
|||
"Raised when hash check fails." |
|||
def __init__(self, fn, calc_hash, needed_hash): |
|||
self.filename = fn |
|||
self.calculated_hash = calc_hash |
|||
self.needed_hash = needed_hash |
|||
def __str__(self): |
|||
return 'HashFailedException({}, got {}, expected {})'.format(self.filename, self.calculated_hash, self.needed_hash) |
|||
def __repr__(self): |
|||
return '<HashFailedException {}, got {}, expected {}>'.format(self.filename, self.calculated_hash, self.needed_hash) |
|||
|
|||
class CanceledException(Exception): |
|||
"Raised when the job is canceled." |
|||
def __init__(self): |
|||
pass |
|||
def __str__(self): |
|||
return 'CanceledException' |
|||
def __repr__(self): |
|||
return "<CanceledException>" |
|||
|
|||
class SmartDL: |
|||
''' |
|||
The main SmartDL class |
|||
|
|||
:param urls: Download url. It is possible to pass unsafe and unicode characters. You can also pass a list of urls, and those will be used as mirrors. |
|||
:type urls: string or list of strings |
|||
:param dest: Destination path. Default is `%TEMP%/pySmartDL/`. |
|||
:type dest: string |
|||
:param progress_bar: If True, prints a progress bar to the `stdout stream <http://docs.python.org/2/library/sys.html#sys.stdout>`_. Default is `True`. |
|||
:type progress_bar: bool |
|||
:param fix_urls: If true, attempts to fix urls with unsafe characters. |
|||
:type fix_urls: bool |
|||
:param threads: Number of threads to use. |
|||
:type threads: int |
|||
:param timeout: Timeout for network operations, in seconds. Default is 5. |
|||
:type timeout: int |
|||
:param logger: An optional logger. |
|||
:type logger: `logging.Logger` instance |
|||
:param connect_default_logger: If true, connects a default logger to the class. |
|||
:type connect_default_logger: bool |
|||
:param request_args: Arguments to be passed to a new urllib.request.Request instance in dictionary form. See `urllib.request docs <https://docs.python.org/3/library/urllib.request.html#urllib.request.Request>`_ for options. |
|||
:type request_args: dict |
|||
:rtype: `SmartDL` instance |
|||
:param verify: If ssl certificates should be validated. |
|||
:type verify: bool |
|||
|
|||
.. NOTE:: |
|||
The provided dest may be a folder or a full path name (including filename). The workflow is: |
|||
|
|||
* If the path exists, and it's an existing folder, the file will be downloaded to there with the original filename. |
|||
* If the past does not exist, it will create the folders, if needed, and refer to the last section of the path as the filename. |
|||
* If you want to download to folder that does not exist at the moment, and want the module to fill in the filename, make sure the path ends with `os.sep`. |
|||
* If no path is provided, `%TEMP%/pySmartDL/` will be used. |
|||
''' |
|||
|
|||
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, threads=5, timeout=5, logger=None, connect_default_logger=False, request_args=None, verify=True): |
|||
if logger: |
|||
self.logger = logger |
|||
elif connect_default_logger: |
|||
self.logger = utils.create_debugging_logger() |
|||
else: |
|||
self.logger = utils.DummyLogger() |
|||
if request_args: |
|||
if "headers" not in request_args: |
|||
request_args["headers"] = dict() |
|||
self.requestArgs = request_args |
|||
else: |
|||
self.requestArgs = {"headers": dict()} |
|||
if "User-Agent" not in self.requestArgs["headers"]: |
|||
self.requestArgs["headers"]["User-Agent"] = utils.get_random_useragent() |
|||
self.mirrors = [urls] if isinstance(urls, str) else urls |
|||
if fix_urls: |
|||
self.mirrors = [utils.url_fix(x) for x in self.mirrors] |
|||
self.url = self.mirrors.pop(0) |
|||
self.logger.info('Using url "{}"'.format(self.url)) |
|||
|
|||
fn = urllib.parse.unquote(os.path.basename(urllib.parse.urlparse(self.url).path)) |
|||
self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) |
|||
if self.dest[-1] == os.sep: |
|||
if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]): |
|||
os.unlink(self.dest[:-1]) |
|||
self.dest += fn |
|||
if os.path.isdir(self.dest): |
|||
self.dest = os.path.join(self.dest, fn) |
|||
|
|||
self.progress_bar = progress_bar |
|||
self.threads_count = threads |
|||
self.timeout = timeout |
|||
self.current_attemp = 1 |
|||
self.attemps_limit = 4 |
|||
self.minChunkFile = 1024**2*2 # 2MB |
|||
self.filesize = 0 |
|||
self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded |
|||
self.thread_shared_cmds = {} |
|||
self.status = "ready" |
|||
self.verify_hash = False |
|||
self._killed = False |
|||
self._failed = False |
|||
self._start_func_blocking = True |
|||
self.errors = [] |
|||
|
|||
self.post_threadpool_thread = None |
|||
self.control_thread = None |
|||
|
|||
if not os.path.exists(os.path.dirname(self.dest)): |
|||
self.logger.info('Folder "{}" does not exist. Creating...'.format(os.path.dirname(self.dest))) |
|||
os.makedirs(os.path.dirname(self.dest)) |
|||
if not utils.is_HTTPRange_supported(self.url, timeout=self.timeout): |
|||
self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.") |
|||
self.threads_count = 1 |
|||
if os.path.exists(self.dest): |
|||
self.logger.warning('Destination "{}" already exists. Existing file will be removed.'.format(self.dest)) |
|||
if not os.path.exists(os.path.dirname(self.dest)): |
|||
self.logger.warning('Directory "{}" does not exist. Creating it...'.format(os.path.dirname(self.dest))) |
|||
os.makedirs(os.path.dirname(self.dest)) |
|||
|
|||
self.logger.info("Creating a ThreadPool of {} thread(s).".format(self.threads_count)) |
|||
self.pool = utils.ManagedThreadPoolExecutor(self.threads_count) |
|||
|
|||
if verify: |
|||
self.context = None |
|||
else: |
|||
self.context = ssl.create_default_context() |
|||
self.context.check_hostname = False |
|||
self.context.verify_mode = ssl.CERT_NONE |
|||
|
|||
def __str__(self): |
|||
return 'SmartDL(r"{}", dest=r"{}")'.format(self.url, self.dest) |
|||
|
|||
def __repr__(self): |
|||
return "<SmartDL {}>".format(self.url) |
|||
|
|||
def add_basic_authentication(self, username, password): |
|||
''' |
|||
Uses HTTP Basic Access authentication for the connection. |
|||
|
|||
:param username: Username. |
|||
:type username: string |
|||
:param password: Password. |
|||
:type password: string |
|||
''' |
|||
auth_string = '{}:{}'.format(username, password) |
|||
base64string = base64.standard_b64encode(auth_string.encode('utf-8')) |
|||
self.requestArgs['headers']['Authorization'] = b"Basic " + base64string |
|||
|
|||
def add_hash_verification(self, algorithm, hash): |
|||
''' |
|||
Adds hash verification to the download. |
|||
|
|||
If hash is not correct, will try different mirrors. If all mirrors aren't |
|||
passing hash verification, `HashFailedException` Exception will be raised. |
|||
|
|||
.. NOTE:: |
|||
If downloaded file already exist on the destination, and hash matches, pySmartDL will not download it again. |
|||
|
|||
.. WARNING:: |
|||
The hashing algorithm must be supported on your system, as documented at `hashlib documentation page <http://docs.python.org/3/library/hashlib.html>`_. |
|||
|
|||
:param algorithm: Hashing algorithm. |
|||
:type algorithm: string |
|||
:param hash: Hash code. |
|||
:type hash: string |
|||
''' |
|||
|
|||
self.verify_hash = True |
|||
self.hash_algorithm = algorithm |
|||
self.hash_code = hash |
|||
|
|||
def fetch_hash_sums(self): |
|||
''' |
|||
Will attempt to fetch UNIX hash sums files (`SHA256SUMS`, `SHA1SUMS` or `MD5SUMS` files in |
|||
the same url directory). |
|||
|
|||
Calls `self.add_hash_verification` if successful. Returns if a matching hash was found. |
|||
|
|||
:rtype: bool |
|||
|
|||
*New in 1.2.1* |
|||
''' |
|||
default_sums_filenames = ['SHA256SUMS', 'SHA1SUMS', 'MD5SUMS'] |
|||
folder = os.path.dirname(self.url) |
|||
orig_basename = os.path.basename(self.url) |
|||
|
|||
self.logger.info("Looking for SUMS files...") |
|||
for filename in default_sums_filenames: |
|||
try: |
|||
sums_url = "%s/%s" % (folder, filename) |
|||
sumsRequest = urllib.request.Request(sums_url, **self.requestArgs) |
|||
obj = urllib.request.urlopen(sumsRequest) |
|||
data = obj.read().split('\n') |
|||
obj.close() |
|||
|
|||
for line in data: |
|||
if orig_basename.lower() in line.lower(): |
|||
self.logger.info("Found a matching hash in %s" % sums_url) |
|||
algo = filename.rstrip('SUMS') |
|||
hash = line.split(' ')[0] |
|||
self.add_hash_verification(algo, hash) |
|||
return |
|||
|
|||
except urllib.error.HTTPError: |
|||
continue |
|||
|
|||
def start(self, blocking=None): |
|||
''' |
|||
Starts the download task. Will raise `RuntimeError` if it's the object's already downloading. |
|||
|
|||
.. warning:: |
|||
If you're using the non-blocking mode, Exceptions won't be raised. In that case, call |
|||
`isSuccessful()` after the task is finished, to make sure the download succeeded. Call |
|||
`get_errors()` to get the the exceptions. |
|||
|
|||
:param blocking: If true, calling this function will block the thread until the download finished. Default is *True*. |
|||
:type blocking: bool |
|||
''' |
|||
if not self.status == "ready": |
|||
raise RuntimeError("cannot start (current status is {})".format(self.status)) |
|||
self.logger.info('Starting a new SmartDL operation.') |
|||
|
|||
if blocking is None: |
|||
blocking = self._start_func_blocking |
|||
else: |
|||
self._start_func_blocking = blocking |
|||
|
|||
if self.mirrors: |
|||
self.logger.info('One URL and {} mirrors are loaded.'.format(len(self.mirrors))) |
|||
else: |
|||
self.logger.info('One URL is loaded.') |
|||
|
|||
if self.verify_hash and os.path.exists(self.dest): |
|||
if utils.get_file_hash(self.hash_algorithm, self.dest) == self.hash_code: |
|||
self.logger.info("Destination '%s' already exists, and the hash matches. No need to download." % self.dest) |
|||
self.status = 'finished' |
|||
return |
|||
|
|||
self.logger.info("Downloading '{}' to '{}'...".format(self.url, self.dest)) |
|||
req = urllib.request.Request(self.url, **self.requestArgs) |
|||
try: |
|||
urlObj = urllib.request.urlopen(req, timeout=self.timeout, context=self.context) |
|||
except (urllib.error.HTTPError, urllib.error.URLError, socket.timeout) as e: |
|||
self.errors.append(e) |
|||
if self.mirrors: |
|||
self.logger.info("{} Trying next mirror...".format(str(e))) |
|||
self.url = self.mirrors.pop(0) |
|||
self.logger.info('Using url "{}"'.format(self.url)) |
|||
self.start(blocking) |
|||
return |
|||
else: |
|||
self.logger.warning(str(e)) |
|||
self.errors.append(e) |
|||
self._failed = True |
|||
self.status = "finished" |
|||
raise |
|||
|
|||
try: |
|||
self.filesize = int(urlObj.headers["Content-Length"]) |
|||
self.logger.info("Content-Length is {} ({}).".format(self.filesize, utils.sizeof_human(self.filesize))) |
|||
except (IndexError, KeyError, TypeError): |
|||
self.logger.warning("Server did not send Content-Length. Filesize is unknown.") |
|||
self.filesize = 0 |
|||
|
|||
args = utils.calc_chunk_size(self.filesize, self.threads_count, self.minChunkFile) |
|||
bytes_per_thread = args[0][1] - args[0][0] + 1 |
|||
if len(args)>1: |
|||
self.logger.info("Launching {} threads (downloads {}/thread).".format(len(args), utils.sizeof_human(bytes_per_thread))) |
|||
else: |
|||
self.logger.info("Launching 1 thread (downloads {}).".format(utils.sizeof_human(bytes_per_thread))) |
|||
|
|||
self.status = "downloading" |
|||
|
|||
for i, arg in enumerate(args): |
|||
req = self.pool.submit( |
|||
download, |
|||
self.url, |
|||
self.dest+".%.3d" % i, |
|||
self.requestArgs, |
|||
self.context, |
|||
arg[0], |
|||
arg[1], |
|||
self.timeout, |
|||
self.shared_var, |
|||
self.thread_shared_cmds, |
|||
self.logger |
|||
) |
|||
|
|||
self.post_threadpool_thread = threading.Thread( |
|||
target=post_threadpool_actions, |
|||
args=( |
|||
self.pool, |
|||
[[(self.dest+".%.3d" % i) for i in range(len(args))], self.dest], |
|||
self.filesize, |
|||
self |
|||
) |
|||
) |
|||
self.post_threadpool_thread.daemon = True |
|||
self.post_threadpool_thread.start() |
|||
|
|||
self.control_thread = ControlThread(self) |
|||
|
|||
if blocking: |
|||
self.wait(raise_exceptions=True) |
|||
|
|||
def _exc_callback(self, req, e): |
|||
self.errors.append(e[0]) |
|||
self.logger.exception(e[1]) |
|||
|
|||
def retry(self, eStr=""): |
|||
if self.current_attemp < self.attemps_limit: |
|||
self.current_attemp += 1 |
|||
self.status = "ready" |
|||
self.shared_var.value = 0 |
|||
self.thread_shared_cmds = {} |
|||
self.start() |
|||
|
|||
else: |
|||
s = 'The maximum retry attempts reached' |
|||
if eStr: |
|||
s += " ({})".format(eStr) |
|||
self.errors.append(urllib.error.HTTPError(self.url, "0", s, {}, StringIO())) |
|||
self._failed = True |
|||
|
|||
def try_next_mirror(self, e=None): |
|||
if self.mirrors: |
|||
if e: |
|||
self.errors.append(e) |
|||
self.status = "ready" |
|||
self.shared_var.value = 0 |
|||
self.url = self.mirrors.pop(0) |
|||
self.logger.info('Using url "{}"'.format(self.url)) |
|||
self.start() |
|||
else: |
|||
self._failed = True |
|||
self.errors.append(e) |
|||
|
|||
def get_eta(self, human=False): |
|||
''' |
|||
Get estimated time of download completion, in seconds. Returns `0` if there is |
|||
no enough data to calculate the estimated time (this will happen on the approx. |
|||
first 5 seconds of each download). |
|||
|
|||
:param human: If true, returns a human-readable formatted string. Else, returns an int type number |
|||
:type human: bool |
|||
:rtype: int/string |
|||
''' |
|||
if human: |
|||
s = utils.time_human(self.control_thread.get_eta()) |
|||
return s if s else "TBD" |
|||
return self.control_thread.get_eta() |
|||
|
|||
def get_speed(self, human=False): |
|||
''' |
|||
Get current transfer speed in bytes per second. |
|||
|
|||
:param human: If true, returns a human-readable formatted string. Else, returns an int type number |
|||
:type human: bool |
|||
:rtype: int/string |
|||
''' |
|||
if human: |
|||
return "{}/s".format(utils.sizeof_human(self.control_thread.get_speed())) |
|||
return self.control_thread.get_speed() |
|||
|
|||
def get_progress(self): |
|||
''' |
|||
Returns the current progress of the download, as a float between `0` and `1`. |
|||
|
|||
:rtype: float |
|||
''' |
|||
if not self.filesize: |
|||
return 0 |
|||
if self.control_thread.get_dl_size() <= self.filesize: |
|||
return 1.0*self.control_thread.get_dl_size()/self.filesize |
|||
return 1.0 |
|||
|
|||
def get_progress_bar(self, length=20): |
|||
''' |
|||
Returns the current progress of the download as a string containing a progress bar. |
|||
|
|||
.. NOTE:: |
|||
That's an alias for pySmartDL.utils.progress_bar(obj.get_progress()). |
|||
|
|||
:param length: The length of the progress bar in chars. Default is 20. |
|||
:type length: int |
|||
:rtype: string |
|||
''' |
|||
return utils.progress_bar(self.get_progress(), length) |
|||
|
|||
def isFinished(self): |
|||
''' |
|||
Returns if the task is finished. |
|||
|
|||
:rtype: bool |
|||
''' |
|||
if self.status == "ready": |
|||
return False |
|||
if self.status == "finished": |
|||
return True |
|||
return not self.post_threadpool_thread.is_alive() |
|||
|
|||
def isSuccessful(self): |
|||
''' |
|||
Returns if the download is successfull. It may fail in the following scenarios: |
|||
|
|||
- Hash check is enabled and fails. |
|||
- All mirrors are down. |
|||
- Any local I/O problems (such as `no disk space available`). |
|||
|
|||
.. NOTE:: |
|||
Call `get_errors()` to get the exceptions, if any. |
|||
|
|||
Will raise `RuntimeError` if it's called when the download task is not finished yet. |
|||
|
|||
:rtype: bool |
|||
''' |
|||
|
|||
if self._killed: |
|||
return False |
|||
|
|||
n = 0 |
|||
while self.status != 'finished': |
|||
n += 1 |
|||
time.sleep(0.1) |
|||
if n >= 15: |
|||
|