If I've understood your question you'd like to retrieve the data from the main table at the
https://salesweb.civilview.com/Sales/SalesSearch?countyId=3 url, as well as the details data for each of the records in the main url.
As an example I wrote a code that lets you retrieve the data from the main page in a structured dataframe, in which the first column is the url of the details record.
#load libraries
library(rvest)
library (tidyverse)
#assign url
url <- "
https://salesweb.civilview.com/Sales/SalesSearch?countyId=3"
#extract td tags contents
readUrlHtml <- read_html(url) %>% html_nodes("td")
#create empty dataframe
df <- data.frame(Details=character(),
Sheriff=character(),
SalesDate=character(),
Plaintiff=character(),
Defendant=character(),
Address=character(),
stringsAsFactors=FALSE)
#loop to harvest the data
j = 1
for (i in 1:(length(readUrlHtml)/6))
{
df[i,c('Details')] <- paste0("
https://salesweb.civilview.com/Sales/SaleDetails?PropertyId=",substr(readUrlHtml[j],65,73))
df[i,c('Sheriff')] <- readUrlHtml[j+1] %>% html_text()
df[i,c('SalesDate')] <- readUrlHtml[j+2] %>% html_text()
df[i,c('Plaintiff')] <- readUrlHtml[j+3] %>% html_text()
df[i,c('Defendant')] <- readUrlHtml[j+4] %>% html_text()
df[i,c('Address')] <- readUrlHtml[j+5] %>% html_text()
j = j + 6
}
#values check
df[1,]
df[50,]
df[525,]
With the rvest package you'll be able to retrieve and save in a new dataframe the data of the details page.
EDIT 2019-03-29
In order to retrieve the details data you need to save the cookies information from the main url. Once done you can create a new dataframe to store that data: this is shown in the updated version of the code.
1) the new library httr is used to retrieve the cookies data
2) the details data being retrieved is the one inside the red rectangle in the printscreen (to retrieve the last I suggest to create a new dataframe to store the additional data, but I guess that this will highly increase the amount of type needed to process all the data!)
3) the two dataframes df & dfDetails may be merged by using the Details key
#load libraries
library(rvest)
library (tidyverse)
library (httr) #new library
#assign url
url <- "
https://salesweb.civilview.com/Sales/SalesSearch?countyId=3"
#extract td tags contents
readUrlHtml <- read_html(url) %>% html_nodes("td")
#create empty dataframe
df <- data.frame(Details=character(),
Sheriff=character(),
SalesDate=character(),
Plaintiff=character(),
Defendant=character(),
Address=character(),
stringsAsFactors=FALSE)
#loop to harvest the data
j = 1
for (i in 1:(length(readUrlHtml)/6))
{
df[i,c('Details')] <- paste0("
https://salesweb.civilview.com/Sales/SaleDetails?PropertyId=",substr(readUrlHtml[j],65,73))
df[i,c('Sheriff')] <- readUrlHtml[j+1] %>% html_text()
df[i,c('SalesDate')] <- readUrlHtml[j+2] %>% html_text()
df[i,c('Plaintiff')] <- readUrlHtml[j+3] %>% html_text()
df[i,c('Defendant')] <- readUrlHtml[j+4] %>% html_text()
df[i,c('Address')] <- readUrlHtml[j+5] %>% html_text()
j = j + 6
}
#values check
df[1,]
df[50,]
df[525,]
## UPDATED SECTION TO RETRIEVE THE URLS DETAILS ##
#retrieve session cookie by taking the url of the main page
urlInfos <- GET(url)
#create empty details dataframe
dfDetails <- data.frame(Details=character(),
Sheriff=character(),
CourtCase=character(),
SalesDate=character(),
Plaintiff=character(),
Defendant=character(),
Address=character(),
Description=character(),
ApproxUpset=character(),
Attorney=character(),
AttorneyPhone=character(),
stringsAsFactors=FALSE)
#loop to harvest the details
for (i in 1:length(df$Details)) #takes a while to retrieve all records! (5-6 mins)
#for (i in 1:3) #loop through few record for testing purposes
{
responseDetail <- GET(df[i,c('Details')], set_cookies(`urlInfos$cookies[6]` = paste0('"',urlInfos$cookies[7],'"')))
readUrlHtmlDetail <- read_html(responseDetail) %>% html_nodes("td")
dfDetails[i,c('Details')] <- df[i,c('Details')]
dfDetails[i,c('Sheriff')] <- readUrlHtmlDetail[2] %>% html_text()
dfDetails[i,c('CourtCase')] <- readUrlHtmlDetail[4] %>% html_text()
dfDetails[i,c('SalesDate')] <- readUrlHtmlDetail[6] %>% html_text()
dfDetails[i,c('Plaintiff')] <- readUrlHtmlDetail[8] %>% html_text()
dfDetails[i,c('Defendant')] <- readUrlHtmlDetail[10] %>% html_text()
dfDetails[i,c('Address')] <- readUrlHtmlDetail[12] %>% html_text()
dfDetails[i,c('ApproxUpset')] <- readUrlHtmlDetail[14] %>% html_text()
dfDetails[i,c('Attorney')] <- readUrlHtmlDetail[16] %>% html_text()
dfDetails[i,c('AttorneyPhone')] <- readUrlHtmlDetail[18] %>% html_text()
}
#values detail check
dfDetails[1,]
dfDetails[50,]
dfDetails[525,]