Skip to main content
Add syntax highlighting hint. (Without it, Python code was being highlighted as if it were Java.)
Source Link
import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))
import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))
import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))
import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))
command-line handling suggestion
Source Link
200_success
  • 145.7k
  • 22
  • 191
  • 481

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.


The invocation method could be improved. Instead of hard-coding a particular directory to look in for the source files, I would suggest…

  • If files are explicitly passed to the program as command-line arguments, use those files.
  • If a directory is specified, then use all files contained therein, excluding files with significant non-ASCII content.
  • If no command-line arguments are used, then operate on the current directory.

It would be nice to be able to say java ReviewPreparer *.java | pbcopy.

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.


The invocation method could be improved. Instead of hard-coding a particular directory to look in for the source files, I would suggest…

  • If files are explicitly passed to the program as command-line arguments, use those files.
  • If a directory is specified, then use all files contained therein, excluding files with significant non-ASCII content.
  • If no command-line arguments are used, then operate on the current directory.

It would be nice to be able to say java ReviewPreparer *.java | pbcopy.

Source Link
200_success
  • 145.7k
  • 22
  • 191
  • 481

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.