so, I'm pretty new to MRJob, so don't be rude please.
I tried write a program (using ChatGPT), that checks tf*idf for specified line in 3 text files; however, i always get the same error: "No configs specified for inline runner", can somebody please explain what's the problem, since googleing didn't help.
code:
from mrjob.job import MRJob
import math
import os
import re
WORD_RE = repile(r"\b\w+\b")
class TFIDFJob(MRJob):
def configure_args(self):
super(TFIDFJob, self).configure_args()
self.add_passthru_arg('--query', type=str, help='Search query')
def mapper(self, _, line):
file_name, content = line.split("\t", 1)
words = WORD_RE.findall(content.lower())
total_words = len(words)
for word in words:
yield (word, file_name), 1
yield ("__TOTAL__", file_name), total_words
def reducer(self, key, values):
if key[0] == "__TOTAL__":
yield key[1], sum(values)
else:
word, file_name = key
word_count = sum(values)
yield (file_name, word), word_count
if __name__ == "__main__":
search_query = input("Enter query: ")
files = []
for file_name in ["file1.txt", "file2.txt", "file3.txt"]:
with open(file_name, 'r', encoding='utf-8') as f:
files.append((file_name, f.read()))
job = TFIDFJob(args=['--query', search_query])
with job.make_runner() as runner:
runner.run()
for key, value in job.parse_output(runner.cat_output()):
print(f"{key}: {value}")
and the error i get:
$ python3 hw1.py file1.txt file2.txt file3.txt
Enter query: 123
No configs specified for inline runner
^CTraceback (most recent call last):
File "/home/dh/xd/hw/hw1.py", line 42, in <module>
runner.run()
File "/home/dh/.local/lib/python3.11/site-packages/mrjob/runner.py", line 503, in run
self._run()
File "/home/dh/.local/lib/python3.11/site-packages/mrjob/sim.py", line 161, in _run
self._run_step(step, step_num)
File "/home/dh/.local/lib/python3.11/site-packages/mrjob/sim.py", line 170, in _run_step
self._run_streaming_step(step, step_num)
File "/home/dh/.local/lib/python3.11/site-packages/mrjob/sim.py", line 179, in _run_streaming_step
self._input_paths_for_step(step_num), step_num)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/dh/.local/lib/python3.11/site-packages/mrjob/sim.py", line 594, in _input_paths_for_step
for input_path_glob in self._get_input_paths()
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/dh/.local/lib/python3.11/site-packages/mrjob/runner.py", line 1083, in _get_input_paths
for line in self._stdin:
KeyboardInterrupt
thank all of you in advance.