Skip to main content
3 of 3
added 2 characters in body

Showcase difference multiprocessing and threading in Python

This week I dived into the issue of speeding up my program by either threading or multiprocessing. I did not understand well the difference between the two, but that became quite clear when I got the results. Below program is processing 12 text data files that has a Pandas DataFrame as output.

It compares the execution times for a simple sequential method; a multiprocessing pool method; a multiprocessing process method; and threading. My computer has 8 cpu's.

What is obvious is that threading in this case has no benefit over simple sequential processing (it took both just over 1000 s [17 minutes]), but multiprocessing pool and multiprocessing process does give an almost equivalent speed up of 3.4 times running the process in about 303 s [5 minutes].

''' program to run extended QC parsing module
    using various options: sequential, multiprocessing, threading
'''
import multiprocessing as mp
import threading
import queue
from pathlib import Path
from vp_extended_qc import ExtendedQc
from Utils.plogger import Logger, timed
from pprint import pprint

# Logging setup
logformat = '%(asctime)s:%(levelname)s:%(message)s'
Logger.set_logger(Path('./logs/vp_extended_qc.log'), logformat, 'INFO')
logger = Logger.getlogger()

def log_message(message):
    print(message)
    logger.info(message)

extended_qc_files = [
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB01.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB02.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB03.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB04.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB05.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB07.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB08.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB09.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB10.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB11.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB12.txt'),
    Path('./data_files/211006 - check VAPS/20210920/210920_VIB13.txt'),
]


@timed(logger, print_log=True)
def extended_qc_pool(file_name):
    log_message(f'run extended qc for: {file_name}')
    ext_qc = ExtendedQc(file_name)
    ext_qc.read_extended_qc()
    return ext_qc.avg_peak_df


@timed(logger, print_log=True)
def extended_qc_thread_or_process(file_name, results_queue):
    log_message(f'run extended qc for: {file_name}')
    ext_qc = ExtendedQc(file_name)
    ext_qc.read_extended_qc()
    results_queue.put(ext_qc.avg_peak_df)


@timed(logger, print_log=True)
def run_sequential():
    results = []
    log_message('start sequential ...')
    results_queue = queue.Queue()
    for filename in extended_qc_files:
        results.append(extended_qc_pool(filename))

    pprint(results)


@timed(logger, print_log=True)
def run_pool():
    results = []
    log_message('start pool ...')
    cpus = mp.cpu_count()
    log_message(f'cpu\'s: {cpus}')
    with mp.Pool(cpus - 1) as pool:
        results.append(pool.map(extended_qc_pool, extended_qc_files))

    pprint(results)


@timed(logger, print_log=True)
def run_processes():
    log_message('start processes ...')
    processes = []
    results_queue = mp.Queue()
    results = []
    for filename in extended_qc_files:
        processes.append(
            mp.Process(
                target=extended_qc_thread_or_process,
                args=(filename, results_queue,),
            )
        )
        processes[-1].start()

    log_message('all processes have started ...')
    for _ in range(len(extended_qc_files)):
        results.append(results_queue.get())
        log_message(f'get results for vibrator: {results[-1].iloc[0]["vibrator"]}')

    log_message('all processes have completed ...')
    pprint(results)


@timed(logger, print_log=True)
def run_threading():
    results = []
    log_message('start threading ...')
    threads = []
    results_queue = queue.Queue()
    results = []
    for filename in extended_qc_files:
        threads.append(
            threading.Thread(
                target=extended_qc_thread_or_process,
                args=(filename, results_queue)
            )
        )
        threads[-1].start()

    log_message('all threats have started ...')
    _ = [t.join() for t in threads]

    log_message('all threats have completed ...')
    while not results_queue.empty():
        results.append(results_queue.get())

    pprint(results)


if __name__ == '__main__':
    log_message(f'=================== timed comparisons =================== ')
    run_sequential()
    run_pool()
    run_processes()
    run_threading()
    log_message(f'======================= completed ======================= ')

log results:

2021-10-18 17:10:00,607:INFO:=================== timed comparisons =================== 
2021-10-18 17:10:00,608:INFO:start sequential ...
2021-10-18 17:10:00,608:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB01.txt
2021-10-18 17:12:17,395:INFO:==> extended_qc_pool ran in 136.787 s
2021-10-18 17:12:17,398:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB02.txt
2021-10-18 17:13:40,244:INFO:==> extended_qc_pool ran in 82.849 s
2021-10-18 17:13:40,245:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB03.txt
2021-10-18 17:13:52,424:INFO:==> extended_qc_pool ran in 12.179 s
2021-10-18 17:13:52,426:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB04.txt
2021-10-18 17:15:53,204:INFO:==> extended_qc_pool ran in 120.78 s
2021-10-18 17:15:53,206:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB05.txt
2021-10-18 17:17:29,051:INFO:==> extended_qc_pool ran in 95.844 s
2021-10-18 17:17:29,056:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB07.txt
2021-10-18 17:19:35,376:INFO:==> extended_qc_pool ran in 126.325 s
2021-10-18 17:19:35,380:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB08.txt
2021-10-18 17:21:18,993:INFO:==> extended_qc_pool ran in 103.615 s
2021-10-18 17:21:18,995:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB09.txt
2021-10-18 17:21:20,982:INFO:==> extended_qc_pool ran in 1.989 s
2021-10-18 17:21:20,988:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB10.txt
2021-10-18 17:23:04,654:INFO:==> extended_qc_pool ran in 103.669 s
2021-10-18 17:23:04,658:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB11.txt
2021-10-18 17:24:49,316:INFO:==> extended_qc_pool ran in 104.66 s
2021-10-18 17:24:49,317:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB12.txt
2021-10-18 17:26:14,879:INFO:==> extended_qc_pool ran in 85.563 s
2021-10-18 17:26:14,881:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB13.txt
2021-10-18 17:27:18,781:INFO:==> extended_qc_pool ran in 63.902 s
2021-10-18 17:27:19,127:INFO:==> run_sequential ran in 1038.52 s
2021-10-18 17:27:19,130:INFO:start pool ...
2021-10-18 17:27:19,130:INFO:cpu: 8
2021-10-18 17:27:21,550:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB01.txt
2021-10-18 17:27:21,581:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB02.txt
2021-10-18 17:27:21,680:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB03.txt
2021-10-18 17:27:21,758:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB04.txt
2021-10-18 17:27:21,810:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB05.txt
2021-10-18 17:27:21,959:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB07.txt
2021-10-18 17:27:22,086:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB08.txt
2021-10-18 17:27:41,817:INFO:==> extended_qc_pool ran in 20.137 s
2021-10-18 17:27:41,819:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB09.txt
2021-10-18 17:27:45,590:INFO:==> extended_qc_pool ran in 3.772 s
2021-10-18 17:27:45,608:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB10.txt
2021-10-18 17:29:54,069:INFO:==> extended_qc_pool ran in 152.49 s
2021-10-18 17:29:54,075:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB11.txt
2021-10-18 17:29:55,724:INFO:==> extended_qc_pool ran in 153.915 s
2021-10-18 17:29:55,737:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB12.txt
2021-10-18 17:30:27,502:INFO:==> extended_qc_pool ran in 185.744 s
2021-10-18 17:30:27,515:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB13.txt
2021-10-18 17:30:36,060:INFO:==> extended_qc_pool ran in 194.512 s
2021-10-18 17:30:43,957:INFO:==> extended_qc_pool ran in 201.891 s
2021-10-18 17:30:44,781:INFO:==> extended_qc_pool ran in 202.824 s
2021-10-18 17:31:00,955:INFO:==> extended_qc_pool ran in 195.362 s
2021-10-18 17:32:10,120:INFO:==> extended_qc_pool ran in 136.045 s
2021-10-18 17:32:23,161:INFO:==> extended_qc_pool ran in 115.646 s
2021-10-18 17:32:23,415:INFO:==> extended_qc_pool ran in 147.678 s
2021-10-18 17:32:24,020:INFO:==> run_pool ran in 304.891 s
2021-10-18 17:32:24,027:INFO:start processes ...
2021-10-18 17:32:24,094:INFO:all processes have started ...
2021-10-18 17:32:27,595:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB05.txt
2021-10-18 17:32:27,628:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB02.txt
2021-10-18 17:32:27,656:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB08.txt
2021-10-18 17:32:27,708:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB04.txt
2021-10-18 17:32:27,748:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB07.txt
2021-10-18 17:32:27,771:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB10.txt
2021-10-18 17:32:27,771:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB11.txt
2021-10-18 17:32:27,799:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB01.txt
2021-10-18 17:32:28,184:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB09.txt
2021-10-18 17:32:28,393:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB12.txt
2021-10-18 17:32:28,742:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB13.txt
2021-10-18 17:32:30,041:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB03.txt
2021-10-18 17:32:36,264:INFO:==> extended_qc_thread_or_process ran in 8.096 s
2021-10-18 17:32:36,389:INFO:get results for vibrator: 9
2021-10-18 17:33:09,257:INFO:==> extended_qc_thread_or_process ran in 39.309 s
2021-10-18 17:33:09,371:INFO:get results for vibrator: 3
2021-10-18 17:36:08,966:INFO:==> extended_qc_thread_or_process ran in 221.374 s
2021-10-18 17:36:09,068:INFO:get results for vibrator: 5
2021-10-18 17:36:19,484:INFO:==> extended_qc_thread_or_process ran in 231.862 s
2021-10-18 17:36:19,517:INFO:get results for vibrator: 2
2021-10-18 17:36:44,191:INFO:==> extended_qc_thread_or_process ran in 256.498 s
2021-10-18 17:36:44,223:INFO:get results for vibrator: 4
2021-10-18 17:37:05,644:INFO:==> extended_qc_thread_or_process ran in 276.977 s
2021-10-18 17:37:05,652:INFO:get results for vibrator: 13
2021-10-18 17:37:11,263:INFO:==> extended_qc_thread_or_process ran in 283.51 s
2021-10-18 17:37:11,337:INFO:get results for vibrator: 10
2021-10-18 17:37:13,987:INFO:==> extended_qc_thread_or_process ran in 286.245 s
2021-10-18 17:37:14,005:INFO:get results for vibrator: 7
2021-10-18 17:37:14,263:INFO:==> extended_qc_thread_or_process ran in 286.469 s
2021-10-18 17:37:14,293:INFO:get results for vibrator: 1
2021-10-18 17:37:17,111:INFO:==> extended_qc_thread_or_process ran in 289.455 s
2021-10-18 17:37:17,118:INFO:get results for vibrator: 8
2021-10-18 17:37:18,881:INFO:==> extended_qc_thread_or_process ran in 291.122 s
2021-10-18 17:37:18,884:INFO:get results for vibrator: 11
2021-10-18 17:37:26,190:INFO:==> extended_qc_thread_or_process ran in 297.854 s
2021-10-18 17:37:26,192:INFO:get results for vibrator: 12
2021-10-18 17:37:26,196:INFO:all processes have completed ...
2021-10-18 17:37:26,582:INFO:==> run_processes ran in 302.558 s
2021-10-18 17:37:26,583:INFO:start threading ...
2021-10-18 17:37:26,584:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB01.txt
2021-10-18 17:37:26,587:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB02.txt
2021-10-18 17:37:26,589:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB03.txt
2021-10-18 17:37:26,595:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB04.txt
2021-10-18 17:37:26,604:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB05.txt
2021-10-18 17:37:26,609:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB07.txt
2021-10-18 17:37:26,620:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB08.txt
2021-10-18 17:37:26,632:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB09.txt
2021-10-18 17:37:26,644:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB10.txt
2021-10-18 17:37:26,654:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB12.txt
2021-10-18 17:37:26,661:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB13.txt
2021-10-18 17:37:26,664:INFO:run extended qc for: data_files\211006 - check VAPS\20210920\210920_VIB11.txt
2021-10-18 17:37:26,671:INFO:all threats have started ...
2021-10-18 17:37:50,140:INFO:==> extended_qc_thread_or_process ran in 23.528 s
2021-10-18 17:39:20,498:INFO:==> extended_qc_thread_or_process ran in 113.911 s
2021-10-18 17:50:11,285:INFO:==> extended_qc_thread_or_process ran in 764.7 s
2021-10-18 17:51:30,443:INFO:==> extended_qc_thread_or_process ran in 843.846 s
2021-10-18 17:52:23,355:INFO:==> extended_qc_thread_or_process ran in 896.772 s
2021-10-18 17:53:12,968:INFO:==> extended_qc_thread_or_process ran in 946.374 s
2021-10-18 17:53:46,476:INFO:==> extended_qc_thread_or_process ran in 979.855 s
2021-10-18 17:53:50,388:INFO:==> extended_qc_thread_or_process ran in 983.78 s
2021-10-18 17:53:55,209:INFO:==> extended_qc_thread_or_process ran in 988.57 s
2021-10-18 17:54:01,452:INFO:==> extended_qc_thread_or_process ran in 994.826 s
2021-10-18 17:54:03,221:INFO:==> extended_qc_thread_or_process ran in 996.617 s
2021-10-18 17:54:09,694:INFO:==> extended_qc_thread_or_process ran in 1003.066 s
2021-10-18 17:54:09,697:INFO:all threats have completed ...
2021-10-18 17:54:10,039:INFO:==> run_threading ran in 1003.457 s
2021-10-18 17:54:10,041:INFO:======================= completed ======================= 

As the documents say threading seems only useful if the process is I/O bound and multiprocessing works if you have more than 1 cpu.