Skip to content

urequests: Add iteration methods to process large responses and support 'with' to close response#278

Open
chrisb2 wants to merge 1 commit intomicropython:masterfrom
chrisb2:master
Open

urequests: Add iteration methods to process large responses and support 'with' to close response#278
chrisb2 wants to merge 1 commit intomicropython:masterfrom
chrisb2:master

Conversation

@chrisb2
Copy link

@chrisb2 chrisb2 commented May 7, 2018

This change adds the following methods from the Response class of the Python requests module to the urequests Response class:

  • iter_lines()
  • iter_content()
  • __enter__()
  • __exit__()
  • __iter__()

This enables two things:

  • processing of large HTTP responses
  • succinct handling of the need to close Responses.

So for example:

# Iterate over individual lines of the response
with urequests.get('http://jsonplaceholder.typicode.com/users') as response:
    for line in response.iter_lines():
        print(line.decode(response.encoding))
		
# Iterate over 'chunks' of the response
with urequests.get('http://jsonplaceholder.typicode.com/users') as response:
    for chunk in response.iter_content():
        print(chunk.decode(response.encoding))

The __iter__() method of Response allows this example:

for chunk in requests.get('http://jsonplaceholder.typicode.com/users'):
    print(chunk.decode('UTF-8'))

I chose to reduce the default size of ITER_CHUNK_SIZE from 512 to 128 to conserve resources, not sure this makes sense?

I have tested this new functionality on ESP8266 and ESP32.

Finally, this obviously increases code size, so maybe the scope can be reduced? The __iter__() method could be dropped as this would not reduce the actual capability. Beyond that we could maybe drop the iter_lines() method? Both __enter__() and __exit__() are small and get rid of nasty try finally syntax so I think these should stay?

@rdagger
Copy link

rdagger commented Apr 11, 2020

This feature looks very helpful. I'm having memory errors with urequests on an ESP32. Are there plans to merge?

@ehagerty
Copy link

@chrisb2 just a thank you for the code here, it not only works but it does sort of blow one's mind that in 2021 micropython doesn't bake this in.

@NoumanSaleem
Copy link

Looks great! @chrisb2 would you consider publishing this under a different name? There are at least three variations of the urequests package living in pip already (https://pypi.org/search/?q=urequests)
Possibly: chrisb2-urequests

Copy link

@davidpcahill davidpcahill left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please merge / add to latest!

@temphstarmans
Copy link

temphstarmans commented May 8, 2024

Commit no longer works, I currently use something like this which has more recent changes such as the SSL change and the redirect.
Furthermore don't forget to set in the headers {"User-Agent": "randomstringhere"}, otherwise you might face issues with the Github REST api.

import usocket

ITER_CHUNK_SIZE = 128


class Response:

    def __init__(self, f):
        self.raw = f
        self.encoding = "utf-8"
        self._content_consumed = False
        self._cached = None

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    def __iter__(self):
        return self.iter_content()

    def close(self):
        if self.raw:
            self.raw.close()
            self.raw = None
        self._cached = None

    @property
    def content(self):
        if self._cached is None:
            try:
                self._cached = self.raw.read()
            finally:
                self.raw.close()
                self.raw = None
        return self._cached

    @property
    def text(self):
        return str(self.content, self.encoding)

    def json(self):
        import ujson

        return ujson.loads(self.content)

    def iter_content(self, chunk_size=ITER_CHUNK_SIZE):
        def generate():
            while True:
                chunk = self.raw.read(chunk_size)
                if not chunk:
                    break
                yield chunk
            self._content_consumed = True

        if self._content_consumed:
            raise RuntimeError("response already consumed")
        elif chunk_size is not None and not isinstance(chunk_size, int):
            raise TypeError(
                "chunk_size must be an int, it is instead a %s." % type(chunk_size)
            )

        return generate()

    def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, delimiter=None):
        pending = None

        for chunk in self.iter_content(chunk_size=chunk_size):

            if pending is not None:
                chunk = pending + chunk

            if delimiter:
                lines = chunk.split(delimiter)
            else:
                lines = chunk.split(b"\n")

            if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
                pending = lines.pop()
            else:
                pending = None

            for line in lines:
                yield line

        if pending is not None:
            yield pending


def request(
    method,
    url,
    data=None,
    json=None,
    headers={},
    stream=None,
    timeout=None,
    parse_headers=True,
):

    redirect = None

    try:
        proto, dummy, host, path = url.split("/", 3)
    except ValueError:
        proto, dummy, host = url.split("/", 2)
        path = ""
    if proto == "http:":
        port = 80
    elif proto == "https:":
        import tls

        port = 443
    else:
        raise ValueError("Unsupported protocol: " + proto)

    if ":" in host:
        host, port = host.split(":", 1)
        port = int(port)

    ai = usocket.getaddrinfo(host, port, 0, usocket.SOCK_STREAM)
    ai = ai[0]

    resp_d = None
    if parse_headers is not False:
        resp_d = {}

    s = usocket.socket(ai[0], ai[1], ai[2])

    if timeout is not None:
        # Note: settimeout is not supported on all platforms, will raise
        # an AttributeError if not available.
        s.settimeout(timeout)

    try:
        s.connect(ai[-1])
        if proto == "https:":
            context = tls.SSLContext(tls.PROTOCOL_TLS_CLIENT)
            context.verify_mode = tls.CERT_NONE
            s = context.wrap_socket(s, server_hostname=host)
        s.write(b"%s /%s HTTP/1.0\r\n" % (method, path))
        if not "Host" in headers:
            s.write(b"Host: %s\r\n" % host)
        # Iterate over keys to avoid tuple alloc
        for k in headers:
            s.write(k)
            s.write(b": ")
            s.write(headers[k])
            s.write(b"\r\n")
        if json is not None:
            assert data is None
            import ujson

            data = ujson.dumps(json)
            s.write(b"Content-Type: application/json\r\n")
        if data:
            s.write(b"Content-Length: %d\r\n" % len(data))
        s.write(b"\r\n")
        if data:
            s.write(data)

        l = s.readline()
        # print(l)
        l = l.split(None, 2)
        status = int(l[1])
        reason = ""
        if len(l) > 2:
            reason = l[2].rstrip()
        while True:
            l = s.readline()
            if not l or l == b"\r\n":
                break
            # print(l)
            if l.startswith(b"Transfer-Encoding:"):
                if b"chunked" in l:
                    raise ValueError("Unsupported " + str(l, "utf-8"))
            elif l.startswith(b"Location:") and not 200 <= status <= 299:
                if status in [301, 302, 303, 307, 308]:
                    redirect = str(l[10:-2], "utf-8")
                else:
                    raise NotImplementedError("Redirect %d not yet supported" % status)
    except OSError:
        s.close()
        raise

    if redirect:
        s.close()
        if status in [301, 302, 303]:
            return request("GET", redirect, None, None, headers, stream)
        else:
            return request(method, redirect, data, json, headers, stream)
    else:
        resp = Response(s)
        resp.status_code = status
        resp.reason = reason
        if resp_d is not None:
            resp.headers = resp_d
        return resp


def head(url, **kw):
    return request("HEAD", url, **kw)


def get(url, **kw):
    return request("GET", url, **kw)


def post(url, **kw):
    return request("POST", url, **kw)


def put(url, **kw):
    return request("PUT", url, **kw)


def patch(url, **kw):
    return request("PATCH", url, **kw)


def delete(url, **kw):
    return request("DELETE", url, **kw)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

6 participants