public class CrawlJob extends Object implements Comparable<CrawlJob>, org.springframework.context.ApplicationListener<org.springframework.context.ApplicationEvent>
Modifier and Type | Class and Description |
---|---|
class |
CrawlJob.JobLogFormatter
Formatter for job.log
|
Modifier and Type | Field and Description |
---|---|
protected PathSharingContext |
ac |
protected AlertThreadGroup |
alertThreadGroup |
protected Handler |
currentLaunchJobLogHandler |
protected boolean |
isLaunchInfoPartial |
protected Logger |
jobLogger |
protected org.joda.time.DateTime |
lastLaunch |
protected int |
launchCount |
protected boolean |
needTeardown |
protected File |
primaryConfig |
protected org.joda.time.DateTime |
xmlOkAt |
Modifier and Type | Method and Description |
---|---|
protected void |
beansException(org.springframework.beans.BeansException be)
Report a BeansException during instantiation; report chain in
reverse order (so root cause is first); ignore non-BeansExceptions
or messages without a useful compact message.
|
void |
checkXML()
Is the primary XML config minimally well-formed?
|
int |
compareTo(CrawlJob o)
Sort for reverse-chronological listing.
|
protected void |
doTeardown() |
String |
elapsedReport() |
Map<String,Object> |
elapsedReportData() |
String |
frontierReport() |
Map<String,Object> |
frontierReportData() |
int |
getAlertCount() |
Object |
getBeanpathTarget(String beanPath)
Utility method for getting a bean or any other object addressable
with a 'bean path' -- a property-path string (with dots and
[]indexes) starting with a bean name.
|
CheckpointService |
getCheckpointService()
Return the configured Checkpointer instance, if there is exactly
one, otherwise null.
|
Map<String,ConfigPath> |
getConfigPaths()
Return all known ConfigPaths, as an aid to viewing or editting.
|
CrawlController |
getCrawlController() |
protected Document |
getDomDocument(File f)
Read a file to a DOM Document; return null if this isn't possible
for any reason.
|
List<File> |
getImportedConfigs(File xml)
Return all config files included via 'import' statements in the
primary config (or other included configs).
|
PathSharingContext |
getJobContext() |
File |
getJobDir() |
File |
getJobLog() |
Logger |
getJobLogger()
Get a logger to a distinguished file, job.log in the job's
directory, into which job-specific events may be reported.
|
String |
getJobStatusDescription() |
long |
getLastActivityTime() |
org.joda.time.DateTime |
getLastLaunch() |
int |
getLaunchCount() |
File |
getPrimaryConfig() |
String |
getShortName() |
protected StatisticsTracker |
getStats() |
boolean |
hasApplicationContext() |
boolean |
hasValidApplicationContext()
Did the ApplicationContext self-validate?
return true if validation passed without errors
|
void |
instantiateContainer()
Can the configuration yield an assembled ApplicationContext?
|
boolean |
isLaunchable()
Is it reasonable to offer a launch button
|
boolean |
isLaunchInfoPartial() |
boolean |
isPausable() |
boolean |
isProfile()
Is this job a 'profile' (or template), meaning it may be editted
or copied to another jobs, but should not be launched.
|
boolean |
isRunning() |
boolean |
isUnpausable() |
boolean |
isXmlOk()
Is the primary config file legal XML?
|
String |
jobDirRelativePath(File f)
Compute a path relative to the job directory for all contained
files, or null if the File is not inside the job directory.
|
void |
launch()
Launch a crawl into 'running' status, assembling if necessary.
|
Object |
loadReport() |
Map<String,Number> |
loadReportData() |
void |
onApplicationEvent(org.springframework.context.ApplicationEvent event)
Log note of all ApplicationEvents.
|
Object |
rateReport() |
Map<String,Number> |
rateReportData() |
protected void |
scanJobLog()
Refresh knowledge of total launched and last launch by scanning
the job.log.
|
protected String |
shortMessage(org.springframework.beans.BeansException ex)
Return a short useful message for common BeansExceptions.
|
String |
sizeTotalsReport() |
Map<String,Long> |
sizeTotalsReportData() |
protected void |
startContext()
Start the context, catching and reporting any BeansExceptions.
|
boolean |
teardown()
Ensure a fresh start for any configuration changes or relaunches,
by stopping and discarding an existing ApplicationContext.
|
void |
terminate() |
String |
threadReport() |
Map<String,Object> |
threadReportData() |
String |
uriTotalsReport() |
Map<String,Long> |
uriTotalsReportData() |
void |
validateConfiguration()
Does the assembled ApplicationContext self-validate? Any failures
are reported as WARNING log events in the job log.
|
void |
writeHtmlTo(PrintWriter pw) |
void |
writeHtmlTo(PrintWriter pw,
String uriPrefix) |
protected File primaryConfig
protected PathSharingContext ac
protected int launchCount
protected boolean isLaunchInfoPartial
protected org.joda.time.DateTime lastLaunch
protected AlertThreadGroup alertThreadGroup
protected org.joda.time.DateTime xmlOkAt
protected Logger jobLogger
protected transient Handler currentLaunchJobLogHandler
protected boolean needTeardown
public CrawlJob(File cxml)
public File getPrimaryConfig()
public File getJobDir()
public String getShortName()
public File getJobLog()
public PathSharingContext getJobContext()
public boolean isLaunchInfoPartial()
public Logger getJobLogger()
public org.joda.time.DateTime getLastLaunch()
public int getLaunchCount()
protected void scanJobLog()
public boolean isProfile()
public void writeHtmlTo(PrintWriter pw)
public void writeHtmlTo(PrintWriter pw, String uriPrefix)
public void checkXML()
protected Document getDomDocument(File f)
f
- File of XMLpublic boolean isXmlOk()
public void instantiateContainer()
protected void beansException(org.springframework.beans.BeansException be)
be
- BeansExceptionprotected String shortMessage(org.springframework.beans.BeansException ex)
ex
- BeansExceptionpublic boolean hasApplicationContext()
public void validateConfiguration()
public boolean hasValidApplicationContext()
public void launch()
protected void startContext()
public int compareTo(CrawlJob o)
compareTo
in interface Comparable<CrawlJob>
Comparable.compareTo(java.lang.Object)
public long getLastActivityTime()
public boolean isRunning()
public CrawlController getCrawlController()
public boolean isPausable()
public boolean isUnpausable()
public CheckpointService getCheckpointService()
public boolean teardown()
protected void doTeardown()
public List<File> getImportedConfigs(File xml)
xml
- File to examinepublic Map<String,ConfigPath> getConfigPaths()
public String jobDirRelativePath(File f)
f
- Filepublic void onApplicationEvent(org.springframework.context.ApplicationEvent event)
onApplicationEvent
in interface org.springframework.context.ApplicationListener<org.springframework.context.ApplicationEvent>
ApplicationListener.onApplicationEvent(org.springframework.context.ApplicationEvent)
public boolean isLaunchable()
public int getAlertCount()
protected StatisticsTracker getStats()
public Object rateReport()
public Object loadReport()
public String uriTotalsReport()
public String sizeTotalsReport()
public String elapsedReport()
public String threadReport()
public String frontierReport()
public void terminate()
public Object getBeanpathTarget(String beanPath)
beanPath
- String 'property-path' with bean name as first segmentpublic String getJobStatusDescription()
Copyright © 2003-2014 Internet Archive. All Rights Reserved.