最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

Creating an Opera extension to scrap Zillow, Realtor, and Redfin to create an excel( .csv) with the listings - Stack Overflow

programmeradmin2浏览0评论

I'm trying to make the search for a house easier on myself by creating an Opera extension that will help me find listings that fit my specific description/requirements. I want it to generate a excel sheet with all the listings that fit with what i gave it.

This is the code i have right now:

background.js

chrome.runtime.onInstalled.addListener(() => {
console.log("House Listings Scraper Extension Installed");});

//Ensure proper service worker registration
self.addEventListener('install', event => {
    event.waitUntil(self.skipWaiting());
});

self.addEventListener('activate', event => {
    event.waitUntil(self.clients.claim());
});

// Handle saving listings as CSV file
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
    if (message.action === "saveListings") {
        const listings = message.data;
        if (listings.length === 0) {
            console.warn("No listings found to save.");
            return;
        }

    let csvContent = "Price,Address,Beds,Baths,Square Feet,Link\n";
    listings.forEach(row => {
        csvContent += `"${row.priceText}","${row.address}",${row.beds},${row.baths},"${row.sqft}","${row.link}"\n`;
    });

    const blob = new Blob([csvContent], { type: "text/csv" });
    const url = URL.createObjectURL(blob);

    chrome.downloads.download({
        url: url,
        filename: "house_listings.csv",
        saveAs: true
    });
}});

content.js

(function() {
function scrapeListings() {
    let listings = [];
    const allowedNeighborhoods = [
        "queens village", "bella vista", "hawthorne", "society hill",
        "washington square west", "center city east", "center city", "midtown village"
    ];

    let listingElements;
    let site = window.location.hostname;

    if (site.includes("realtor")) {
        listingElements = document.querySelectorAll('ponent_property-card');
    } else if (site.includes("zillow")) {
        listingElements = document.querySelectorAll('.property-card, .list-card-info');
    } else if (site.includes("redfin")) {
        listingElements = document.querySelectorAll('.HomeCardContainer, div[data-rf-test-name="homeCard"]');
    } else {
        console.error("Unsupported site:", site);
        return [];
    }

    console.log(`Found ${listingElements.length} listings on ${site}`);

    listingElements.forEach(listing => {
        try {
            let priceText = listing.querySelector('[data-rf-test-name="homecard-price"]')?.innerText ||
                            listing.querySelector('.price')?.innerText ||
                            listing.querySelector('.list-card-price')?.innerText || "";
            let price = parseInt(priceText.replace(/[^0-9]/g, ''));

            let address = listing.querySelector('.homeAddressV2')?.innerText.toLowerCase() ||
                          listing.querySelector('.list-card-addr')?.innerText.toLowerCase() ||
                          listing.querySelector('.address')?.innerText.toLowerCase() || "";

            let beds = parseInt(
                listing.querySelector('[data-rf-test-name="abp-beds"]')?.innerText.split(' ')[0] ||
                listing.querySelector('.list-card-details li:nth-child(1)')?.innerText.split(' ')[0] ||
                listing.querySelector('.property-meta-beds')?.innerText.split(' ')[0] || "0"
            );

            let baths = parseFloat(
                listing.querySelector('[data-rf-test-name="abp-baths"]')?.innerText.split(' ')[0] ||
                listing.querySelector('.list-card-details li:nth-child(2)')?.innerText.split(' ')[0] ||
                listing.querySelector('.property-meta-baths')?.innerText.split(' ')[0] || "0"
            );

            let sqft = listing.querySelector('[data-rf-test-name="abp-sqFt"]')?.innerText ||
                       listing.querySelector('.list-card-details li:nth-child(3)')?.innerText ||
                       listing.querySelector('.property-meta-sqft')?.innerText || "N/A";

            let link = listing.querySelector('a')?.href || "";

            let inNeighborhood = allowedNeighborhoods.some(neighborhood => address.includes(neighborhood));

            if (beds >= 3 && baths >= 2 && price < 700000 && inNeighborhood) {
                listings.push({ priceText, address, beds, baths, sqft, link });
            }
        } catch (error) {
            console.error("Error processing listing:", error, listing);
        }
    });

    return listings;
}

let listings = scrapeListings();
console.log("Scraped Listings:", listings);
chrome.runtime.sendMessage({ action: "saveListings", data: listings });})();

manifest.jason

{
"manifest_version": 3,
"name": "House Listings Scraper",
"version": "1.0",
"permissions": ["activeTab", "tabs", "downloads"],
"host_permissions": ["https://*.realtor/*", "https://*.zillow/*", "https://*.redfin/*"],
"background": {
    "service_worker": "background.js"
},
"action": {
    "default_title": "Scrape Listings"
},
"content_scripts": [
    {
        "matches": ["https://*.realtor/*", "https://*.zillow/*", "https://*.redfin/*"],
        "js": ["content.js"]
    }
]}

it currently runs but doesn't find any listings when there are ones i can find manually.

I'm thinking that the error is in this section:

const allowedNeighborhoods = [
    "queens village", "bella vista", "hawthorne", "society hill",
    "washington square west", "center city east", "center city", "midtown village"
];

its possible that this is making it search for a listing in all of these neighborhoods at once, instead of searching through one after the other.

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论