You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
4.0 KiB

3 years ago
import os
import urllib.request, urllib.error, urllib.parse
import time
from . import utils
def download(url, dest, requestArgs=None, context=None, startByte=0, endByte=None, timeout=4, shared_var=None, thread_shared_cmds=None, logger=None, retries=3):
"The basic download function that runs at each thread."
logger = logger or utils.DummyLogger()
req = urllib.request.Request(url, **requestArgs)
if endByte:
req.add_header('Range', 'bytes={:.0f}-{:.0f}'.format(startByte, endByte))
logger.info("Downloading '{}' to '{}'...".format(url, dest))
try:
# Context is used to skip ssl validation if verify is False.
urlObj = urllib.request.urlopen(req, timeout=timeout, context=context)
except urllib.error.HTTPError as e:
if e.code == 416:
'''
HTTP 416 Error: Requested Range Not Satisfiable. Happens when we ask
for a range that is not available on the server. It will happen when
the server will try to send us a .html page that means something like
"you opened too many connections to our server". If this happens, we
will wait for the other threads to finish their connections and try again.
'''
if retries > 0:
logger.warning("Thread didn't got the file it was expecting. Retrying ({} times left)...".format(retries-1))
time.sleep(5)
return download(url, dest, requestArgs, startByte, endByte, timeout, shared_var, thread_shared_cmds, logger, retries-1)
else:
raise
else:
raise
with open(dest, 'wb') as f:
if endByte:
filesize = endByte-startByte
else:
try:
meta = urlObj.info()
filesize = int(urlObj.headers["Content-Length"])
logger.info("Content-Length is {}.".format(filesize))
except (IndexError, KeyError, TypeError):
logger.warning("Server did not send Content-Length. Filesize is unknown.")
filesize_dl = 0 # total downloaded size
limitspeed_timestamp = time.time()
limitspeed_filesize = 0
block_sz = 8192
while True:
if thread_shared_cmds:
if 'stop' in thread_shared_cmds:
logger.info('stop command received. Stopping.')
raise CanceledException()
if 'pause' in thread_shared_cmds:
time.sleep(0.2)
continue
if 'limit' in thread_shared_cmds:
now = time.time()
time_passed = now - limitspeed_timestamp
if time_passed > 0.1: # we only observe the limit after 100ms
# if we passed the limit, we should
if (filesize_dl-limitspeed_filesize)/time_passed >= thread_shared_cmds['limit']:
time_to_sleep = (filesize_dl-limitspeed_filesize) / thread_shared_cmds['limit']
logger.debug('Thread has downloaded {} in {}. Limit is {}/s. Slowing down...'.format(utils.sizeof_human(filesize_dl-limitspeed_filesize), utils.time_human(time_passed, fmt_short=True, show_ms=True), utils.sizeof_human(thread_shared_cmds['limit'])))
time.sleep(time_to_sleep)
continue
else:
limitspeed_timestamp = now
limitspeed_filesize = filesize_dl
try:
buff = urlObj.read(block_sz)
except Exception as e:
logger.error(str(e))
if shared_var:
shared_var.value -= filesize_dl
raise
if not buff:
break
filesize_dl += len(buff)
if shared_var:
shared_var.value += len(buff)
f.write(buff)
urlObj.close()