public class CrawlController extends Object implements Serializable, org.springframework.context.Lifecycle, org.springframework.context.ApplicationContextAware, Checkpointable
Modifier and Type | Class and Description |
---|---|
static class |
CrawlController.State |
static class |
CrawlController.StopCompleteEvent |
Modifier and Type | Field and Description |
---|---|
protected AlertThreadGroup |
alertThreadGroup |
protected org.springframework.context.support.AbstractApplicationContext |
appCtx |
protected CandidateChain |
candidateChain
Candidate chain
|
protected DispositionChain |
dispositionChain
Disposition chain
|
protected FetchChain |
fetchChain
Fetch chain
|
protected Frontier |
frontier
The frontier to use for the crawl.
|
protected boolean |
hasStarted |
protected boolean |
isRunning |
protected boolean |
isStopComplete |
protected CrawlerLoggerModule |
loggerModule |
protected int |
maxToeThreads
Maximum number of threads processing URIs at the same time.
|
protected CrawlMetadata |
metadata |
protected boolean |
pauseAtStart
whether to pause at crawl start
|
protected int |
recorderInBufferBytes
Size in bytes of in-memory buffer to record inbound traffic.
|
protected int |
recorderOutBufferBytes
Size in bytes of in-memory buffer to record outbound traffic.
|
protected Checkpoint |
recoveryCheckpoint |
protected boolean |
runWhileEmpty
whether to keep running (without pause or finish) when frontier is empty
|
protected ConfigPath |
scratchDir
Scratch directory for temporary overflow-to-disk
|
protected SeedModule |
seeds |
protected ServerCache |
serverCache |
protected StatisticsTracker |
statisticsTracker
Statistics tracking modules.
|
Constructor and Description |
---|
CrawlController() |
Modifier and Type | Method and Description |
---|---|
boolean |
atFinish()
Evaluate if the crawl should stop because it is finished,
without actually stopping the crawl.
|
void |
beginCrawlStop()
Start the process of stopping the crawl.
|
protected void |
completePause() |
protected void |
completeStop()
Called when the last toethread exits.
|
void |
doCheckpoint(Checkpoint checkpointInProgress)
Do the actual checkpoint.
|
void |
finishCheckpoint(Checkpoint checkpointInProgress)
Cleanup/unlock; need not complete for a checkpoint to be valid.
|
void |
freeReserveMemory() |
int |
getActiveToeCount() |
CandidateChain |
getCandidateChain() |
CrawlStatus |
getCrawlExitStatus() |
DispositionChain |
getDispositionChain() |
FetchChain |
getFetchChain() |
Frontier |
getFrontier() |
String |
getFrontierReportShort() |
CrawlerLoggerModule |
getLoggerModule() |
int |
getMaxToeThreads() |
CrawlMetadata |
getMetadata() |
boolean |
getPauseAtStart() |
int |
getRecorderInBufferBytes() |
int |
getRecorderOutBufferBytes() |
boolean |
getRunWhileEmpty() |
ConfigPath |
getScratchDir() |
SeedModule |
getSeeds() |
ServerCache |
getServerCache() |
Object |
getState() |
StatisticsTracker |
getStatisticsTracker() |
int |
getToeCount() |
ToePool |
getToePool() |
String |
getToeThreadReport() |
String |
getToeThreadReportShort() |
Map<String,Object> |
getToeThreadReportShortData() |
boolean |
hasStarted() |
boolean |
isActive()
Is this crawl actively able/trying to crawl? Includes both
states RUNNING and EMPTY.
|
boolean |
isFinished() |
boolean |
isPaused()
Tell if the controller is paused
|
boolean |
isPausing() |
boolean |
isRunning() |
boolean |
isStopComplete() |
void |
killThread(int threadNumber,
boolean replace)
Kills a thread.
|
void |
logProgressStatistics(String msg)
Log to the progress statistics log.
|
void |
noteFrontierState(Frontier.State reachedState)
Receive notification from the frontier, in the frontier's own
manager thread, that the frontier has reached a new state.
|
void |
requestCrawlPause()
Stop the crawl temporarly.
|
void |
requestCrawlResume()
Resume crawl from paused state
|
void |
requestCrawlStart()
Operator requested crawl begin
|
void |
requestCrawlStop()
Operator requested for crawl to stop.
|
void |
requestCrawlStop(CrawlStatus message)
Operator requested for crawl to stop.
|
protected void |
sendCrawlStateChangeEvent(CrawlController.State newState,
CrawlStatus status)
Send crawl change event to all listeners.
|
void |
setApplicationContext(org.springframework.context.ApplicationContext applicationContext) |
void |
setCandidateChain(CandidateChain candidateChain) |
void |
setDispositionChain(DispositionChain dispositionChain) |
void |
setFetchChain(FetchChain fetchChain) |
void |
setFrontier(Frontier frontier) |
void |
setLoggerModule(CrawlerLoggerModule loggerModule) |
void |
setMaxToeThreads(int maxToeThreads) |
void |
setMetadata(CrawlMetadata provider) |
void |
setPauseAtStart(boolean pauseAtStart) |
void |
setRecorderInBufferBytes(int recorderInBufferBytes) |
void |
setRecorderOutBufferBytes(int recorderOutBufferBytes) |
void |
setRecoveryCheckpoint(Checkpoint recoveryCheckpoint)
Used to inform a bean that it should restore its state from
the given Checkpoint when launched (Lifecycle start()).
|
void |
setRunWhileEmpty(boolean runWhileEmpty) |
void |
setScratchDir(ConfigPath scratchDir) |
void |
setSeeds(SeedModule seeds) |
void |
setServerCache(ServerCache serverCache) |
void |
setStatisticsTracker(StatisticsTracker statisticsTracker) |
protected void |
setupToePool() |
void |
start() |
void |
startCheckpoint(Checkpoint checkpointInProgress)
Note a checkpoint is about to begin.
|
void |
stop() |
protected org.springframework.context.support.AbstractApplicationContext appCtx
protected CrawlMetadata metadata
protected ServerCache serverCache
protected Frontier frontier
protected ConfigPath scratchDir
protected StatisticsTracker statisticsTracker
protected SeedModule seeds
protected FetchChain fetchChain
protected DispositionChain dispositionChain
protected CandidateChain candidateChain
protected int maxToeThreads
protected boolean runWhileEmpty
protected boolean pauseAtStart
protected int recorderOutBufferBytes
protected int recorderInBufferBytes
protected CrawlerLoggerModule loggerModule
protected transient AlertThreadGroup alertThreadGroup
protected boolean isRunning
protected boolean hasStarted
protected boolean isStopComplete
protected Checkpoint recoveryCheckpoint
public void setApplicationContext(org.springframework.context.ApplicationContext applicationContext) throws org.springframework.beans.BeansException
setApplicationContext
in interface org.springframework.context.ApplicationContextAware
org.springframework.beans.BeansException
public CrawlMetadata getMetadata()
public void setMetadata(CrawlMetadata provider)
public ServerCache getServerCache()
public void setServerCache(ServerCache serverCache)
public Frontier getFrontier()
public void setFrontier(Frontier frontier)
public ConfigPath getScratchDir()
public void setScratchDir(ConfigPath scratchDir)
public StatisticsTracker getStatisticsTracker()
public void setStatisticsTracker(StatisticsTracker statisticsTracker)
public SeedModule getSeeds()
public void setSeeds(SeedModule seeds)
public FetchChain getFetchChain()
public void setFetchChain(FetchChain fetchChain)
public DispositionChain getDispositionChain()
public void setDispositionChain(DispositionChain dispositionChain)
public CandidateChain getCandidateChain()
public void setCandidateChain(CandidateChain candidateChain)
public int getMaxToeThreads()
public void setMaxToeThreads(int maxToeThreads)
public boolean getRunWhileEmpty()
public void setRunWhileEmpty(boolean runWhileEmpty)
public boolean getPauseAtStart()
public void setPauseAtStart(boolean pauseAtStart)
public int getRecorderOutBufferBytes()
public void setRecorderOutBufferBytes(int recorderOutBufferBytes)
public int getRecorderInBufferBytes()
public void setRecorderInBufferBytes(int recorderInBufferBytes)
public CrawlerLoggerModule getLoggerModule()
public void setLoggerModule(CrawlerLoggerModule loggerModule)
public void start()
start
in interface org.springframework.context.Lifecycle
public boolean isRunning()
isRunning
in interface org.springframework.context.Lifecycle
public void stop()
stop
in interface org.springframework.context.Lifecycle
protected void sendCrawlStateChangeEvent(CrawlController.State newState, CrawlStatus status)
newState
- State change we're to tell listeners' about.message
- Message on state change.public boolean hasStarted()
public boolean isStopComplete()
public void requestCrawlStart()
protected void completeStop()
protected void completePause()
public void requestCrawlStop()
public void requestCrawlStop(CrawlStatus message)
message
- public void beginCrawlStop()
public void requestCrawlPause()
public boolean isPaused()
public boolean isPausing()
public boolean isActive()
public boolean isFinished()
public void requestCrawlResume()
public int getActiveToeCount()
protected void setupToePool()
public int getToeCount()
ToePool.getToeCount()
public ToePool getToePool()
public void killThread(int threadNumber, boolean replace)
ToePool.killThread(int, boolean)
.threadNumber
- Thread to kill.replace
- Should thread be replaced.ToePool.killThread(int, boolean)
public boolean atFinish()
public void freeReserveMemory()
public void logProgressStatistics(String msg)
msg
- Message to write the progress statistics log.public Object getState()
public CrawlStatus getCrawlExitStatus()
public String getToeThreadReport()
public String getToeThreadReportShort()
public String getFrontierReportShort()
public void noteFrontierState(Frontier.State reachedState)
reachedState
- the state the frontier has reachedpublic void startCheckpoint(Checkpoint checkpointInProgress)
Checkpointable
startCheckpoint
in interface Checkpointable
checkpointInProgress
- Checkpointpublic void doCheckpoint(Checkpoint checkpointInProgress) throws IOException
Checkpointable
doCheckpoint
in interface Checkpointable
checkpointInProgress
- CheckpointIOException
public void finishCheckpoint(Checkpoint checkpointInProgress)
Checkpointable
finishCheckpoint
in interface Checkpointable
checkpointInProgress
- Checkpointpublic void setRecoveryCheckpoint(Checkpoint recoveryCheckpoint)
Checkpointable
setRecoveryCheckpoint
in interface Checkpointable
recoveryCheckpoint
- CheckpointCopyright © 2003-2014 Internet Archive. All Rights Reserved.